Skip to content

Commit

Permalink
[improvement](statistics)Remove useless stats validation check. (#43279)
Browse files Browse the repository at this point in the history
Remove useless stats validation check.
Before, we will not load the column stats when table rowCount > 0 and
ndv == 0 and nullCount != rowCount. This is to avoid using invalid
stats.
Now, we remove this validation because the planner side added validation
to the column stats, (see #41790).
Besides, after remove of the validation, it is easier to add regression
test using stats injection.
  • Loading branch information
Jibing-Li committed Nov 11, 2024
1 parent a843ca3 commit 68006e8
Show file tree
Hide file tree
Showing 4 changed files with 2 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,6 @@ public String toSQL(boolean roundByParentheses) {
}

public ColumnStatistic toColumnStatistic() {
// For non-empty table, return UNKNOWN if we can't collect ndv value.
// Because inaccurate ndv is very misleading.
if (count > 0 && ndv == 0 && count != nullCount) {
return ColumnStatistic.UNKNOWN;
}
try {
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder();
columnStatisticBuilder.setCount(count);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,6 @@ protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) {
// it will trigger load function again without cache an empty value.
return null;
}
if (columnStatistic.isPresent()) {
// For non-empty table, return UNKNOWN if we can't collect ndv value.
// Because inaccurate ndv is very misleading.
ColumnStatistic stats = columnStatistic.get();
if (stats.count > 0 && stats.ndv == 0 && stats.count != stats.numNulls) {
columnStatistic = Optional.of(ColumnStatistic.UNKNOWN);
}
}
return columnStatistic;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,12 +171,7 @@ private void doPreHeat() {
long tblId = statsId.tblId;
long idxId = statsId.idxId;
String colId = statsId.colId;
final StatisticsCacheKey k =
new StatisticsCacheKey(tblId, idxId, colId);
ColumnStatistic c = ColumnStatistic.fromResultRow(r);
if (c.count > 0 && c.ndv == 0 && c.count != c.numNulls) {
c = ColumnStatistic.UNKNOWN;
}
final StatisticsCacheKey k = new StatisticsCacheKey(tblId, idxId, colId);
putCache(k, c);
} catch (Throwable t) {
LOG.warn("Error when preheating stats cache. reason: [{}]. Row:[{}]", t.getMessage(), r);
Expand Down
4 changes: 1 addition & 3 deletions regression-test/suites/statistics/analyze_stats.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -2706,9 +2706,7 @@ PARTITION `p599` VALUES IN (599)
alter_result = sql """show column stats alter_test(id)"""
assertEquals(1, alter_result.size())
alter_result = sql """show column cached stats alter_test(id)"""
assertEquals(0, alter_result.size())
alter_result = sql """show column cached stats alter_test(id)"""
assertEquals(0, alter_result.size())
assertEquals(1, alter_result.size())
sql """alter table alter_test modify column id set stats ('row_count'='100', 'ndv'='0', 'num_nulls'='100', 'data_size'='2.69975443E8', 'min_value'='1', 'max_value'='2');"""
alter_result = sql """show column stats alter_test(id)"""
assertEquals(1, alter_result.size())
Expand Down

0 comments on commit 68006e8

Please sign in to comment.