-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathStorageBlockLayout.proto
151 lines (127 loc) · 5.67 KB
/
StorageBlockLayout.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
syntax = "proto2";
package quickstep;
// Options for TupleStorageSubBlocks.
message TupleStorageSubBlockDescription {
enum TupleStorageSubBlockType {
BASIC_COLUMN_STORE = 0;
COMPRESSED_PACKED_ROW_STORE = 1;
COMPRESSED_COLUMN_STORE = 2;
SPLIT_ROW_STORE = 3;
}
required TupleStorageSubBlockType sub_block_type = 1;
// The convention for extension numbering is that extensions for a particular
// TupleStorageSubBlock type should begin from (sub_block_type + 1) * 32.
extensions 32 to max;
}
message BasicColumnStoreTupleStorageSubBlockDescription {
extend TupleStorageSubBlockDescription {
// Indicates the attribute to sort the column store by. If unset, the
// column store will be unsorted.
optional int32 sort_attribute_id = 64;
}
}
message CompressedPackedRowStoreTupleStorageSubBlockDescription {
extend TupleStorageSubBlockDescription {
repeated int32 compressed_attribute_id = 96;
}
}
message CompressedColumnStoreTupleStorageSubBlockDescription {
extend TupleStorageSubBlockDescription {
// Required when sub_block_type == COMPRESSED_COLUMN_STORE.
optional int32 sort_attribute_id = 128;
repeated int32 compressed_attribute_id = 129;
}
}
// Options for IndexSubBlocks.
message IndexSubBlockDescription {
enum IndexSubBlockType {
CSB_TREE = 0;
SMA = 1;
BLOOM_FILTER = 2;
BITWEAVING_H = 3;
BITWEAVING_V = 4;
}
required IndexSubBlockType sub_block_type = 1;
repeated int32 indexed_attribute_ids = 2;
// The convention for extension numbering is that extensions for a particular
// TupleStorageSubBlock type should begin from (sub_block_type + 1) * 32.
extensions 32 to max;
}
message BloomFilterIndexSubBlockDescription {
extend IndexSubBlockDescription {
// The default values are added for a 2 MB block size, which were
// determined from empirical experiments. These values control the
// amount of false positivity that is expected from Bloom Filter.
// - Default bloom filter size per attribute = 10 KB.
// - Default number of hash functions used in bloom filter = 20.
// - Default number of elements expected to be stored = 1 million.
optional fixed64 bloom_filter_size = 96 [default = 10000];
optional int32 number_of_hashes = 97 [default = 20];
optional fixed64 projected_element_count = 98 [default = 1000000];
}
}
// A complete logical description of a layout.
message StorageBlockLayoutDescription {
required uint64 num_slots = 1;
required TupleStorageSubBlockDescription tuple_store_description = 2;
repeated IndexSubBlockDescription index_description = 3;
}
// A binary-format header for an individual StorageBlock. The memory-layout of
// a StorageBlock is as follows:
// - 4 bytes: An int which is the length of the subsequent StorageBlockHeader
// (obtained from StorageBlockHeader::ByteSize())
// - Variable length (previous int): A binary-serialized StorageBlockHeader.
// - Variable length (header.tuple_store_size): A TupleStorageSubBlock.
// - A series of 0 or more IndexSubBlocks whose lengths are from
// header.index_size.
message StorageBlockHeader {
required StorageBlockLayoutDescription layout = 1;
// Sub-block sizes in bytes.
//
// NOTE(chasseur): Although uint64 would almost always lead to smaller, more
// efficient coding, we use fixed64 so that the exact size of a header can be
// known before the sub-block sizes are computed.
required fixed64 tuple_store_size = 2;
repeated fixed64 index_size = 3 [packed=true];
// Whether each IndexSubBlock is in a consistent state (i.e. completely and
// accurately reflects the contents of the TupleStorageSubBlock).
repeated bool index_consistent = 4 [packed=true];
}
// A binary-format header for common information in compressed blocks.
message CompressedBlockInfo {
// The compressed byte-length of each attribute. If this is different than
// the byte-length of the attribute's type in the catalog, then the attribute
// is compressed.
repeated fixed64 attribute_size = 1 [packed=true];
// The size of dictionaries. There is one entry for each attribute. A
// positive value indicates the size of the attribute's compression
// dictionary. Zero indicates that the attribute is either uncompressed,
// or is an integer-like attribute which is compressed by truncating values
// to a shorter byte length.
repeated fixed64 dictionary_size = 2 [packed=true];
// The size in bits of the null bitmap(s) for uncompressed attributes with
// null values. May be zero if no uncompressed attributes have any null
// values.
required fixed64 null_bitmap_bits = 3;
// Whether each attribute is uncompressed and has null values (i.e. whether
// it has a columnar null bitmap in CompressedColumnStoreTupleStorageSubBlock
// or bits in the row-major null bitmap of a
// CompressedPackedRowStoreTupleStorageSubBlock).
repeated bool uncompressed_attribute_has_nulls = 4 [packed=true];
}