Skip to content

Commit aa800ef

Browse files
authored
HIVE-29275: Stats autogather calculates the min statistic incorrectly (#6194)
1 parent 87a503f commit aa800ef

File tree

11 files changed

+168
-134
lines changed

11 files changed

+168
-134
lines changed

ql/src/java/org/apache/hadoop/hive/ql/ddl/ShowUtils.java

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
4444
import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
4545
import org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData;
46+
import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils;
4647
import org.apache.hadoop.hive.ql.exec.Utilities;
4748
import org.apache.hadoop.hive.ql.metadata.HiveException;
4849
import org.apache.hadoop.hive.ql.session.SessionState;
@@ -54,7 +55,6 @@
5455
import java.io.IOException;
5556
import java.io.OutputStream;
5657
import java.io.OutputStreamWriter;
57-
import java.math.BigInteger;
5858
import java.nio.charset.StandardCharsets;
5959
import java.time.ZoneId;
6060
import java.util.ArrayList;
@@ -233,12 +233,7 @@ public static String[] extractColumnValues(FieldSchema column, boolean isColumnS
233233
}
234234

235235
public static String convertToString(Decimal val) {
236-
if (val == null) {
237-
return "";
238-
}
239-
240-
HiveDecimal result = HiveDecimal.create(new BigInteger(val.getUnscaled()), val.getScale());
241-
return (result != null) ? result.toString() : "";
236+
return MetaStoreServerUtils.decimalToString(val);
242237
}
243238

244239
public static String convertToString(org.apache.hadoop.hive.metastore.api.Date val) {

ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -136,25 +136,25 @@ STAGE PLANS:
136136
Statistics: Num rows: 24576 Data size: 5505024 Basic stats: COMPLETE Column stats: COMPLETE
137137
Filter Operator
138138
predicate: (cdecimal1) IN (3.35, 4.46) (type: boolean)
139-
Statistics: Num rows: 12288 Data size: 2752512 Basic stats: COMPLETE Column stats: COMPLETE
139+
Statistics: Num rows: 24576 Data size: 5505024 Basic stats: COMPLETE Column stats: COMPLETE
140140
Top N Key Operator
141141
sort order: ++
142142
keys: cdecimal1 (type: decimal(10,2)), cdecimal2 (type: decimal(38,5))
143143
null sort order: zz
144-
Statistics: Num rows: 12288 Data size: 2752512 Basic stats: COMPLETE Column stats: COMPLETE
144+
Statistics: Num rows: 24576 Data size: 5505024 Basic stats: COMPLETE Column stats: COMPLETE
145145
top n: 2
146146
Group By Operator
147147
keys: cdecimal1 (type: decimal(10,2)), cdecimal2 (type: decimal(38,5))
148148
minReductionHashAggr: 0.99
149149
mode: hash
150150
outputColumnNames: _col0, _col1
151-
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
151+
Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE
152152
Reduce Output Operator
153153
key expressions: _col0 (type: decimal(10,2)), _col1 (type: decimal(38,5))
154154
null sort order: zz
155155
sort order: ++
156156
Map-reduce partition columns: _col0 (type: decimal(10,2)), _col1 (type: decimal(38,5))
157-
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
157+
Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE
158158
Execution mode: vectorized, llap
159159
LLAP IO: all inputs
160160
Map Vectorization:
@@ -179,13 +179,13 @@ STAGE PLANS:
179179
keys: KEY._col0 (type: decimal(10,2)), KEY._col1 (type: decimal(38,5))
180180
mode: mergepartial
181181
outputColumnNames: _col0, _col1
182-
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
182+
Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE
183183
Limit
184184
Number of rows: 2
185-
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
185+
Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE
186186
File Output Operator
187187
compressed: false
188-
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
188+
Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE
189189
table:
190190
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
191191
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -243,25 +243,25 @@ STAGE PLANS:
243243
Statistics: Num rows: 24576 Data size: 5505024 Basic stats: COMPLETE Column stats: COMPLETE
244244
Filter Operator
245245
predicate: (cdecimal1) IN (3.35, 4.46) (type: boolean)
246-
Statistics: Num rows: 12288 Data size: 2752512 Basic stats: COMPLETE Column stats: COMPLETE
246+
Statistics: Num rows: 24576 Data size: 5505024 Basic stats: COMPLETE Column stats: COMPLETE
247247
Top N Key Operator
248248
sort order: ++
249249
keys: cdecimal1 (type: decimal(10,2)), cdecimal2 (type: decimal(38,5))
250250
null sort order: zz
251-
Statistics: Num rows: 12288 Data size: 2752512 Basic stats: COMPLETE Column stats: COMPLETE
251+
Statistics: Num rows: 24576 Data size: 5505024 Basic stats: COMPLETE Column stats: COMPLETE
252252
top n: 2
253253
Group By Operator
254254
keys: cdecimal1 (type: decimal(10,2)), cdecimal2 (type: decimal(38,5))
255255
minReductionHashAggr: 0.99
256256
mode: hash
257257
outputColumnNames: _col0, _col1
258-
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
258+
Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE
259259
Reduce Output Operator
260260
key expressions: _col0 (type: decimal(10,2)), _col1 (type: decimal(38,5))
261261
null sort order: zz
262262
sort order: ++
263263
Map-reduce partition columns: _col0 (type: decimal(10,2)), _col1 (type: decimal(38,5))
264-
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
264+
Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE
265265
Execution mode: vectorized, llap
266266
LLAP IO: all inputs
267267
Map Vectorization:
@@ -287,13 +287,13 @@ STAGE PLANS:
287287
keys: KEY._col0 (type: decimal(10,2)), KEY._col1 (type: decimal(38,5))
288288
mode: mergepartial
289289
outputColumnNames: _col0, _col1
290-
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
290+
Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE
291291
Limit
292292
Number of rows: 2
293-
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
293+
Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE
294294
File Output Operator
295295
compressed: false
296-
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
296+
Statistics: Num rows: 2 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE
297297
table:
298298
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
299299
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

ql/src/test/results/clientpositive/llap/stats_histogram.q.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ POSTHOOK: type: DESCTABLE
361361
POSTHOOK: Input: default@test_stats
362362
col_name e
363363
data_type decimal(5,2)
364-
min -10.2
364+
min -123.2
365365
max 12.2
366366
num_nulls 1
367367
distinct_count 11

ql/src/test/results/clientpositive/llap/stats_histogram_null.q.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ POSTHOOK: type: DESCTABLE
436436
POSTHOOK: Input: default@test_stats
437437
col_name e
438438
data_type decimal(5,2)
439-
min -12.3
439+
min -123.2
440440
max 12.2
441441
num_nulls 1
442442
distinct_count 15

0 commit comments

Comments
 (0)