Skip to content

Commit a3bd58b

Browse files
committed
[improvement](statistics)External table getRowCount return -1 when row count is not available or row count is 0. (#43009)
External table getRowCount return -1 when row count is not available or row count is 0. So the behavior of external table could match with internal olap table.
1 parent 2f33dd5 commit a3bd58b

11 files changed

Lines changed: 119 additions & 31 deletions

File tree

fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,6 @@ public enum OlapTableState {
119119
WAITING_STABLE
120120
}
121121

122-
public static long ROW_COUNT_BEFORE_REPORT = -1;
123-
124122
private volatile OlapTableState state;
125123

126124
// index id -> index meta
@@ -1298,12 +1296,12 @@ public long getRowCountForIndex(long indexId, boolean strict) {
12981296
if (index == null) {
12991297
LOG.warn("Index {} not exist in partition {}, table {}, {}",
13001298
indexId, entry.getValue().getName(), id, name);
1301-
return ROW_COUNT_BEFORE_REPORT;
1299+
return UNKNOWN_ROW_COUNT;
13021300
}
13031301
if (strict && !index.getRowCountReported()) {
1304-
return ROW_COUNT_BEFORE_REPORT;
1302+
return UNKNOWN_ROW_COUNT;
13051303
}
1306-
rowCount += index.getRowCount() == -1 ? 0 : index.getRowCount();
1304+
rowCount += index.getRowCount() == UNKNOWN_ROW_COUNT ? 0 : index.getRowCount();
13071305
}
13081306
return rowCount;
13091307
}

fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,6 @@ public List<Long> getChunkSizes() {
614614

615615
@Override
616616
public long fetchRowCount() {
617-
return 0;
617+
return UNKNOWN_ROW_COUNT;
618618
}
619619
}

fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@
4545
public interface TableIf {
4646
Logger LOG = LogManager.getLogger(TableIf.class);
4747

48+
long UNKNOWN_ROW_COUNT = -1;
49+
4850
void readLock();
4951

5052
boolean tryReadLock(long timeout, TimeUnit unit);

fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ public long getRowCount() {
290290
makeSureInitialized();
291291
} catch (Exception e) {
292292
LOG.warn("Failed to initialize table {}.{}.{}", catalog.getName(), dbName, name, e);
293-
return 0;
293+
return TableIf.UNKNOWN_ROW_COUNT;
294294
}
295295
// All external table should get external row count from cache.
296296
return Env.getCurrentEnv().getExtMetaCacheMgr().getRowCountCache().getCachedRowCount(catalog.getId(), dbId, id);
@@ -302,7 +302,7 @@ public long getRowCount() {
302302
* This is called by ExternalRowCountCache to load row count cache.
303303
*/
304304
public long fetchRowCount() {
305-
return 0;
305+
return UNKNOWN_ROW_COUNT;
306306
}
307307

308308
@Override

fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.doris.catalog.HudiUtils;
2424
import org.apache.doris.catalog.PrimitiveType;
2525
import org.apache.doris.catalog.ScalarType;
26+
import org.apache.doris.catalog.TableIf;
2627
import org.apache.doris.catalog.Type;
2728
import org.apache.doris.datasource.HMSExternalCatalog;
2829
import org.apache.doris.datasource.hive.HiveMetaStoreCache;
@@ -308,9 +309,9 @@ private long getRowCountFromExternalSource() {
308309
break;
309310
default:
310311
LOG.warn("getRowCount for dlaType {} is not supported.", dlaType);
311-
rowCount = -1;
312+
rowCount = TableIf.UNKNOWN_ROW_COUNT;
312313
}
313-
return rowCount;
314+
return rowCount > 0 ? rowCount : UNKNOWN_ROW_COUNT;
314315
}
315316

316317
@Override
@@ -477,7 +478,7 @@ public long fetchRowCount() {
477478
// Get row count from hive metastore property.
478479
long rowCount = getRowCountFromExternalSource();
479480
// Only hive table supports estimate row count by listing file.
480-
if (rowCount == -1 && dlaType.equals(DLAType.HIVE)) {
481+
if (rowCount == UNKNOWN_ROW_COUNT && dlaType.equals(DLAType.HIVE)) {
481482
LOG.debug("Will estimate row count from file list.");
482483
rowCount = StatisticsUtil.getRowCountFromFileList(this);
483484
}

fe/fe-core/src/main/java/org/apache/doris/catalog/external/PaimonExternalTable.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,10 +175,10 @@ public long fetchRowCount() {
175175
for (Split split : splits) {
176176
rowCount += split.rowCount();
177177
}
178-
return rowCount;
178+
return rowCount > 0 ? rowCount : UNKNOWN_ROW_COUNT;
179179
} catch (Exception e) {
180180
LOG.warn("Fail to collect row count for db {} table {}", dbName, name, e);
181181
}
182-
return -1;
182+
return UNKNOWN_ROW_COUNT;
183183
}
184184
}

fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ protected Optional<Long> doLoad(RowCountKey rowCountKey) {
9494
}
9595

9696
/**
97-
* Get cached row count for the given table. Return 0 if cached not loaded or table not exists.
97+
* Get cached row count for the given table. Return -1 if cached not loaded or table not exists.
9898
* Cached will be loaded async.
9999
* @param catalogId
100100
* @param dbId
@@ -106,12 +106,12 @@ public long getCachedRowCount(long catalogId, long dbId, long tableId) {
106106
try {
107107
CompletableFuture<Optional<Long>> f = rowCountCache.get(key);
108108
if (f.isDone()) {
109-
return f.get().orElse(0L);
109+
return f.get().orElse(TableIf.UNKNOWN_ROW_COUNT);
110110
}
111111
} catch (Exception e) {
112112
LOG.warn("Unexpected exception while returning row count", e);
113113
}
114-
return 0;
114+
return TableIf.UNKNOWN_ROW_COUNT;
115115
}
116116

117117
}

fe/fe-core/src/main/java/org/apache/doris/external/iceberg/util/IcebergUtils.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.apache.doris.analysis.Subquery;
3737
import org.apache.doris.catalog.Column;
3838
import org.apache.doris.catalog.Env;
39+
import org.apache.doris.catalog.TableIf;
3940
import org.apache.doris.catalog.Type;
4041
import org.apache.doris.common.Config;
4142
import org.apache.doris.common.DdlException;
@@ -616,15 +617,18 @@ public static long getIcebergRowCount(ExternalCatalog catalog, String dbName, St
616617
.getIcebergTable(catalog, dbName, tbName);
617618
Snapshot snapshot = icebergTable.currentSnapshot();
618619
if (snapshot == null) {
620+
LOG.info("Iceberg table {}.{}.{} is empty, return -1.", catalog.getName(), dbName, tbName);
619621
// empty table
620-
return 0;
622+
return TableIf.UNKNOWN_ROW_COUNT;
621623
}
622624
Map<String, String> summary = snapshot.summary();
623-
return Long.parseLong(summary.get(TOTAL_RECORDS)) - Long.parseLong(summary.get(TOTAL_POSITION_DELETES));
625+
long rows = Long.parseLong(summary.get(TOTAL_RECORDS)) - Long.parseLong(summary.get(TOTAL_POSITION_DELETES));
626+
LOG.info("Iceberg table {}.{}.{} row count in summary is {}", catalog.getName(), dbName, tbName, rows);
627+
return rows;
624628
} catch (Exception e) {
625629
LOG.warn("Fail to collect row count for db {} table {}", dbName, tbName, e);
626630
}
627-
return -1;
631+
return TableIf.UNKNOWN_ROW_COUNT;
628632
}
629633

630634
}

fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ protected void createAnalyzeJobForTbl(DatabaseIf<? extends TableIf> db,
181181
? AnalysisMethod.SAMPLE : AnalysisMethod.FULL;
182182
if (table instanceof OlapTable && analysisMethod.equals(AnalysisMethod.SAMPLE)) {
183183
OlapTable ot = (OlapTable) table;
184-
if (ot.getRowCountForIndex(ot.getBaseIndexId(), true) == OlapTable.ROW_COUNT_BEFORE_REPORT) {
184+
if (ot.getRowCountForIndex(ot.getBaseIndexId(), true) == TableIf.UNKNOWN_ROW_COUNT) {
185185
LOG.info("Table {} row count is not fully reported, skip auto analyzing this time.", ot.getName());
186186
return;
187187
}

fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -562,18 +562,19 @@ public static int getTableHealth(long totalRows, long updatedRows) {
562562
public static long getHiveRowCount(HMSExternalTable table) {
563563
Map<String, String> parameters = table.getRemoteTable().getParameters();
564564
if (parameters == null) {
565-
return -1;
565+
return TableIf.UNKNOWN_ROW_COUNT;
566566
}
567567
// Table parameters contains row count, simply get and return it.
568568
if (parameters.containsKey(NUM_ROWS)) {
569569
long rows = Long.parseLong(parameters.get(NUM_ROWS));
570570
// Sometimes, the NUM_ROWS in hms is 0 but actually is not. Need to check TOTAL_SIZE if NUM_ROWS is 0.
571-
if (rows != 0) {
571+
if (rows > 0) {
572+
LOG.info("Get row count {} for hive table {} in table parameters.", rows, table.getName());
572573
return rows;
573574
}
574575
}
575576
if (!parameters.containsKey(TOTAL_SIZE)) {
576-
return -1;
577+
return TableIf.UNKNOWN_ROW_COUNT;
577578
}
578579
// Table parameters doesn't contain row count but contain total size. Estimate row count : totalSize/rowSize
579580
long totalSize = Long.parseLong(parameters.get(TOTAL_SIZE));
@@ -582,9 +583,13 @@ public static long getHiveRowCount(HMSExternalTable table) {
582583
estimatedRowSize += column.getDataType().getSlotSize();
583584
}
584585
if (estimatedRowSize == 0) {
585-
return -1;
586+
LOG.warn("Hive table {} estimated row size is invalid {}", table.getName(), estimatedRowSize);
587+
return TableIf.UNKNOWN_ROW_COUNT;
586588
}
587-
return totalSize / estimatedRowSize;
589+
long rows = totalSize / estimatedRowSize;
590+
LOG.debug("Get row count {} for hive table {} by total size {} and row size {}",
591+
rows, table.getName(), totalSize, estimatedRowSize);
592+
return rows;
588593
}
589594

590595
/**
@@ -608,7 +613,7 @@ public static long getTotalSizeFromHMS(HMSExternalTable table) {
608613
*/
609614
public static long getRowCountFromFileList(HMSExternalTable table) {
610615
if (table.isView()) {
611-
return 0;
616+
return TableIf.UNKNOWN_ROW_COUNT;
612617
}
613618
HiveMetaStoreCache.HivePartitionValues partitionValues = getPartitionValuesForTable(table);
614619
int totalPartitionSize = partitionValues == null ? 1 : partitionValues.getIdToPartitionItem().size();
@@ -635,12 +640,13 @@ public static long getRowCountFromFileList(HMSExternalTable table) {
635640
estimatedRowSize += column.getDataType().getSlotSize();
636641
}
637642
if (estimatedRowSize == 0) {
638-
return 0;
643+
return TableIf.UNKNOWN_ROW_COUNT;
639644
}
640645
if (samplePartitionSize < totalPartitionSize) {
641646
totalSize = totalSize * totalPartitionSize / samplePartitionSize;
642647
}
643-
return totalSize / estimatedRowSize;
648+
long rows = totalSize / estimatedRowSize;
649+
return rows > 0 ? rows : TableIf.UNKNOWN_ROW_COUNT;
644650
}
645651

646652
public static HiveMetaStoreCache.HivePartitionValues getPartitionValuesForTable(HMSExternalTable table) {

0 commit comments

Comments
 (0)