Skip to content

Commit aef3b54

Browse files
authored
branch-3.0-pick: [Fix](partial update) Fix incorrect result when partial update include delete sign columns #46194 (#46336)
pick #46194
1 parent b76afc0 commit aef3b54

13 files changed

+408
-44
lines changed

be/src/olap/base_tablet.cpp

+6-8
Original file line numberDiff line numberDiff line change
@@ -931,7 +931,7 @@ Status BaseTablet::generate_new_block_for_partial_update(
931931
// read current rowset first, if a row in the current rowset has delete sign mark
932932
// we don't need to read values from old block
933933
RETURN_IF_ERROR(read_plan_update.read_columns_by_plan(
934-
*rowset_schema, update_cids, rsid_to_rowset, update_block, &read_index_update));
934+
*rowset_schema, update_cids, rsid_to_rowset, update_block, &read_index_update, false));
935935
size_t update_rows = read_index_update.size();
936936
for (auto i = 0; i < update_cids.size(); ++i) {
937937
for (auto idx = 0; idx < update_rows; ++idx) {
@@ -951,19 +951,17 @@ Status BaseTablet::generate_new_block_for_partial_update(
951951
// rowid in the final block(start from 0, increase, may not continuous becasue we skip to read some rows) -> rowid to read in old_block
952952
std::map<uint32_t, uint32_t> read_index_old;
953953
RETURN_IF_ERROR(read_plan_ori.read_columns_by_plan(*rowset_schema, missing_cids, rsid_to_rowset,
954-
old_block, &read_index_old,
954+
old_block, &read_index_old, true,
955955
new_block_delete_signs));
956956
size_t old_rows = read_index_old.size();
957957
const auto* __restrict old_block_delete_signs =
958958
get_delete_sign_column_data(old_block, old_rows);
959-
959+
DCHECK(old_block_delete_signs != nullptr);
960960
// build default value block
961961
auto default_value_block = old_block.clone_empty();
962-
if (old_block_delete_signs != nullptr || new_block_delete_signs != nullptr) {
963-
RETURN_IF_ERROR(BaseTablet::generate_default_value_block(
964-
*rowset_schema, missing_cids, partial_update_info->default_values, old_block,
965-
default_value_block));
966-
}
962+
RETURN_IF_ERROR(BaseTablet::generate_default_value_block(*rowset_schema, missing_cids,
963+
partial_update_info->default_values,
964+
old_block, default_value_block));
967965
auto mutable_default_value_columns = default_value_block.mutate_columns();
968966

969967
CHECK(update_rows >= old_rows);

be/src/olap/partial_update_info.cpp

+23-14
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#include "olap/olap_common.h"
2525
#include "olap/rowset/rowset.h"
2626
#include "olap/rowset/rowset_writer_context.h"
27-
#include "olap/tablet_meta.h"
2827
#include "olap/tablet_schema.h"
2928
#include "olap/utils.h"
3029
#include "util/bitmap_value.h"
@@ -206,9 +205,21 @@ void PartialUpdateReadPlan::prepare_to_read(const RowLocation& row_location, siz
206205
// read columns by read plan
207206
// read_index: ori_pos-> block_idx
208207
Status PartialUpdateReadPlan::read_columns_by_plan(
209-
const TabletSchema& tablet_schema, const std::vector<uint32_t> cids_to_read,
208+
const TabletSchema& tablet_schema, std::vector<uint32_t> cids_to_read,
210209
const std::map<RowsetId, RowsetSharedPtr>& rsid_to_rowset, vectorized::Block& block,
211-
std::map<uint32_t, uint32_t>* read_index, const signed char* __restrict skip_map) const {
210+
std::map<uint32_t, uint32_t>* read_index, bool force_read_old_delete_signs,
211+
const signed char* __restrict cur_delete_signs) const {
212+
if (force_read_old_delete_signs) {
213+
// always read delete sign column from historical data
214+
if (const vectorized::ColumnWithTypeAndName* old_delete_sign_column =
215+
block.try_get_by_name(DELETE_SIGN);
216+
old_delete_sign_column == nullptr) {
217+
auto del_col_cid = tablet_schema.field_index(DELETE_SIGN);
218+
cids_to_read.emplace_back(del_col_cid);
219+
block.swap(tablet_schema.create_block_by_cids(cids_to_read));
220+
}
221+
}
222+
212223
bool has_row_column = tablet_schema.has_row_store_for_all_columns();
213224
auto mutable_columns = block.mutate_columns();
214225
size_t read_idx = 0;
@@ -218,7 +229,7 @@ Status PartialUpdateReadPlan::read_columns_by_plan(
218229
CHECK(rowset_iter != rsid_to_rowset.end());
219230
std::vector<uint32_t> rids;
220231
for (auto [rid, pos] : mappings) {
221-
if (skip_map && skip_map[pos]) {
232+
if (cur_delete_signs && cur_delete_signs[pos]) {
222233
continue;
223234
}
224235
rids.emplace_back(rid);
@@ -263,17 +274,15 @@ Status PartialUpdateReadPlan::fill_missing_columns(
263274
// record real pos, key is input line num, value is old_block line num
264275
std::map<uint32_t, uint32_t> read_index;
265276
RETURN_IF_ERROR(read_columns_by_plan(tablet_schema, missing_cids, rsid_to_rowset,
266-
old_value_block, &read_index, nullptr));
267-
268-
const auto* delete_sign_column_data = BaseTablet::get_delete_sign_column_data(old_value_block);
277+
old_value_block, &read_index, true, nullptr));
269278

279+
const auto* old_delete_signs = BaseTablet::get_delete_sign_column_data(old_value_block);
280+
DCHECK(old_delete_signs != nullptr);
270281
// build default value columns
271282
auto default_value_block = old_value_block.clone_empty();
272-
if (has_default_or_nullable || delete_sign_column_data != nullptr) {
273-
RETURN_IF_ERROR(BaseTablet::generate_default_value_block(
274-
tablet_schema, missing_cids, rowset_ctx->partial_update_info->default_values,
275-
old_value_block, default_value_block));
276-
}
283+
RETURN_IF_ERROR(BaseTablet::generate_default_value_block(
284+
tablet_schema, missing_cids, rowset_ctx->partial_update_info->default_values,
285+
old_value_block, default_value_block));
277286
auto mutable_default_value_columns = default_value_block.mutate_columns();
278287

279288
// fill all missing value from mutable_old_columns, need to consider default value and null value
@@ -285,8 +294,8 @@ Status PartialUpdateReadPlan::fill_missing_columns(
285294
// read values from old rows for missing values in this occasion. So we should read the DELETE_SIGN column
286295
// to check if a row REALLY exists in the table.
287296
auto pos_in_old_block = read_index[idx + segment_start_pos];
288-
if (use_default_or_null_flag[idx] || (delete_sign_column_data != nullptr &&
289-
delete_sign_column_data[pos_in_old_block] != 0)) {
297+
if (use_default_or_null_flag[idx] ||
298+
(old_delete_signs != nullptr && old_delete_signs[pos_in_old_block] != 0)) {
290299
for (auto i = 0; i < missing_cids.size(); ++i) {
291300
// if the column has default value, fill it with default value
292301
// otherwise, if the column is nullable, fill it with null value

be/src/olap/partial_update_info.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,11 @@ class PartialUpdateReadPlan {
8080
public:
8181
void prepare_to_read(const RowLocation& row_location, size_t pos);
8282
Status read_columns_by_plan(const TabletSchema& tablet_schema,
83-
const std::vector<uint32_t> cids_to_read,
83+
std::vector<uint32_t> cids_to_read,
8484
const std::map<RowsetId, RowsetSharedPtr>& rsid_to_rowset,
8585
vectorized::Block& block, std::map<uint32_t, uint32_t>* read_index,
86-
const signed char* __restrict skip_map = nullptr) const;
86+
bool force_read_old_delete_signs,
87+
const signed char* __restrict cur_delete_signs = nullptr) const;
8788
Status fill_missing_columns(RowsetWriterContext* rowset_ctx,
8889
const std::map<RowsetId, RowsetSharedPtr>& rsid_to_rowset,
8990
const TabletSchema& tablet_schema, vectorized::Block& full_block,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !sql --
3+
1 1 1 1 1
4+
2 2 2 2 2
5+
3 3 3 3 3
6+
4 4 4 4 4
7+
5 5 5 5 5
8+
9+
-- !sql --
10+
1 1 1 987 987
11+
2 \N \N 987 987
12+
3 3 3 3 3
13+
4 -1 -1 987 987
14+
5 \N \N 987 987
15+

regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_merge_type.out

+16-16
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,15 @@
2828
7 7 7 7
2929
8 8 8 8
3030
10 \N 999 \N
31-
11 \N 888 \N
31+
11 \N \N \N
3232

3333
-- !sql_2_1 --
3434
0 0 0 0
3535
3 3 30 3
3636
7 7 7 7
3737
8 8 8 8
3838
10 \N 999 \N
39-
11 \N 888 \N
39+
11 \N \N \N
4040

4141
-- !sql_2_2 --
4242
0 0 0 0
@@ -90,7 +90,7 @@
9090
7 7 7 7
9191
8 8 8 8
9292
10 \N 999 \N
93-
11 \N 888 \N
93+
11 \N \N \N
9494

9595
-- !inspect --
9696
0 0 0 0 1 0
@@ -109,15 +109,15 @@
109109
8 8 8 8 1 0
110110
10 \N 999 \N 2 0
111111
11 \N 888 \N 2 1
112-
11 \N 888 \N 3 0
112+
11 \N \N \N 3 0
113113

114114
-- !sql_4_1 --
115115
0 0 0 0
116116
3 3 30 3
117117
7 7 7 7
118118
8 8 8 8
119119
10 \N 999 \N
120-
11 \N 888 \N
120+
11 \N \N \N
121121

122122
-- !inspect --
123123
0 0 0 0 1 0
@@ -138,7 +138,7 @@
138138
8 8 8 8 1 0
139139
10 \N 999 \N 2 0
140140
11 \N 888 \N 2 1
141-
11 \N 888 \N 3 0
141+
11 \N \N \N 3 0
142142

143143
-- !sql_4_2 --
144144
0 0 0 0
@@ -166,8 +166,8 @@
166166
8 8 8 8 1 0
167167
10 \N 999 \N 2 0
168168
11 \N 888 \N 2 1
169-
11 \N 888 \N 3 0
170-
11 \N 888 \N 5 1
169+
11 \N \N \N 3 0
170+
11 \N \N \N 5 1
171171

172172
-- !sql --
173173
0 0 0 0
@@ -198,15 +198,15 @@
198198
7 7 7 7
199199
8 8 8 8
200200
10 \N 999 \N
201-
11 \N 888 \N
201+
11 \N \N \N
202202

203203
-- !sql_2_1 --
204204
0 0 0 0
205205
3 3 30 3
206206
7 7 7 7
207207
8 8 8 8
208208
10 \N 999 \N
209-
11 \N 888 \N
209+
11 \N \N \N
210210

211211
-- !sql_2_2 --
212212
0 0 0 0
@@ -260,7 +260,7 @@
260260
7 7 7 7
261261
8 8 8 8
262262
10 \N 999 \N
263-
11 \N 888 \N
263+
11 \N \N \N
264264

265265
-- !inspect --
266266
0 0 0 0 1 0
@@ -279,15 +279,15 @@
279279
8 8 8 8 1 0
280280
10 \N 999 \N 2 0
281281
11 \N 888 \N 2 1
282-
11 \N 888 \N 3 0
282+
11 \N \N \N 3 0
283283

284284
-- !sql_4_1 --
285285
0 0 0 0
286286
3 3 30 3
287287
7 7 7 7
288288
8 8 8 8
289289
10 \N 999 \N
290-
11 \N 888 \N
290+
11 \N \N \N
291291

292292
-- !inspect --
293293
0 0 0 0 1 0
@@ -308,7 +308,7 @@
308308
8 8 8 8 1 0
309309
10 \N 999 \N 2 0
310310
11 \N 888 \N 2 1
311-
11 \N 888 \N 3 0
311+
11 \N \N \N 3 0
312312

313313
-- !sql_4_2 --
314314
0 0 0 0
@@ -336,6 +336,6 @@
336336
8 8 8 8 1 0
337337
10 \N 999 \N 2 0
338338
11 \N 888 \N 2 1
339-
11 \N 888 \N 3 0
340-
11 \N 888 \N 5 1
339+
11 \N \N \N 3 0
340+
11 \N \N \N 5 1
341341

regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.out

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
-- !partial_update_with_seq_hidden_columns --
1515
1 doris 200 123 1 2023-01-01 1 3 2023-01-01
16-
2 doris2 2600 223 1 2023-07-20 1 4 2023-07-20
16+
2 unknown 2600 \N 4321 2023-07-20 1 4 2023-07-20
1717
3 unknown 1500 \N 4321 2022-07-20 1 4 2022-07-20
1818

1919
-- !select_default --
@@ -30,6 +30,6 @@
3030

3131
-- !partial_update_with_seq_hidden_columns --
3232
1 doris 200 123 1 2023-01-01 1 3 2023-01-01
33-
2 doris2 2600 223 1 2023-07-20 1 4 2023-07-20
33+
2 unknown 2600 \N 4321 2023-07-20 1 4 2023-07-20
3434
3 unknown 1500 \N 4321 2022-07-20 1 4 2022-07-20
3535

regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_type_delete.out

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
-- !partial_update_with_seq_test --
1414

1515
-- !partial_update_with_seq_test_hidden --
16-
1 doris 2300 2300 1 2021-05-19 1 4 2300
16+
1 unknown 2300 2300 4321 2021-05-19 1 4 2300
1717
2 doris2 3600 2400 1 2019-01-23 1 3 3600
1818
3 unknown 1500 2500 4321 2022-03-31 1 4 2500
1919

@@ -41,7 +41,7 @@
4141
-- !partial_update_with_seq_test --
4242

4343
-- !partial_update_with_seq_test_hidden --
44-
1 doris 2300 2300 1 2021-05-19 1 4 2300
44+
1 unknown 2300 2300 4321 2021-05-19 1 4 2300
4545
2 doris2 3600 2400 1 2019-01-23 1 3 3600
4646
3 unknown 1500 2500 4321 2022-03-31 1 4 2500
4747

0 commit comments

Comments
 (0)