Skip to content

Commit 90e5b55

Browse files
committed
NO SUBCHUNK MATERIALIZATION: Temporary commit
The work in progress. Still working on qana::RelationGraph to move away from query rewriting for materialized sub-chunks and inject constaints for sub-chunks on the chunk and chunk overlap tables. The uit test still fails in: qproc/testQueryAnaGeneral:246 qproc/testQueryAnaGeneral:1296 More details can be found in the file a.txt that is located at the home folder of the package.
1 parent 90cc74e commit 90e5b55

File tree

6 files changed

+117
-41
lines changed

6 files changed

+117
-41
lines changed

src/qana/RelationGraph.cc

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <limits>
3434
#include <memory>
3535
#include <stdexcept>
36+
#include <unordered_set>
3637

3738
// LSST headers
3839
#include "lsst/log/Log.h"
@@ -79,10 +80,12 @@ using query::QueryTemplate;
7980
using query::SelectStmt;
8081
using query::TableRef;
8182
using query::TableRefList;
83+
using query::ValueExpr;
8284
using query::ValueExprPtr;
8385
using query::ValueExprPtrVector;
8486
using query::ValueFactor;
8587
using query::ValueFactorPtr;
88+
using query::WhereClause;
8689

8790
std::ostream& operator<<(std::ostream& out, Vertex const& vertex) {
8891
out << "Vertex("
@@ -155,6 +158,13 @@ void Vertex::insert(Edge const& e) {
155158
}
156159
}
157160

161+
std::shared_ptr<CompPredicate> Vertex::makeSubChunkCompPredicate() const {
162+
auto const table = tr.hasAlias() ? tr.getAlias() : info->table;
163+
return std::make_shared<CompPredicate>(ValueExpr::newColumnExpr("", table, "", SUB_CHUNK_COLUMN),
164+
CompPredicate::EQUALS_OP,
165+
ValueExpr::newSimple(ValueFactor::newConstFactor(SUBCHUNK_TAG)));
166+
}
167+
158168
// ----------------------------------------------------------------
159169
// RelationGraph implementation
160170

@@ -1029,12 +1039,14 @@ void RelationGraph::rewrite(SelectStmtPtrVector& outputs, QueryMapping& mapping)
10291039
// Find directors for which overlap is required. At the same time, rewrite
10301040
// all table references as their corresponding chunk templates.
10311041
std::vector<Vertex*> overlapRefs;
1042+
LOGS(_log, LOG_LVL_INFO, "1: rewrite _vertices.size()=" << _vertices.size());
10321043
for (ListIter i = _vertices.begin(), e = _vertices.end(); i != e; ++i) {
10331044
i->rewriteAsChunkTemplate();
10341045
if (i->info->kind == TableInfo::DIRECTOR && i->overlap > 0.0) {
10351046
overlapRefs.push_back(&(*i));
10361047
}
10371048
}
1049+
LOGS(_log, LOG_LVL_INFO, "2: rewrite overlapRefs.size()=" << overlapRefs.size());
10381050
if (overlapRefs.empty()) {
10391051
// There is no need for sub-chunking, so leave it off for now.
10401052
//
@@ -1058,31 +1070,80 @@ void RelationGraph::rewrite(SelectStmtPtrVector& outputs, QueryMapping& mapping)
10581070
"references that require overlap");
10591071
}
10601072

1073+
// Add sub-chunk constraint for all director & match tables
1074+
// Collect full table names (or aliases) in a set to avoid duplicates in setting sub-chunk constraints
1075+
std::unordered_set<std::string> tableNames;
1076+
for (ListIter i = _vertices.begin(), e = _vertices.end(); i != e; ++i) {
1077+
std::string kindName;
1078+
switch (i->info->kind) {
1079+
case TableInfo::DIRECTOR:
1080+
kindName = "DIRECTOR";
1081+
break;
1082+
case TableInfo::CHILD:
1083+
kindName = "CHILD";
1084+
break;
1085+
case TableInfo::MATCH:
1086+
kindName = "MATCH";
1087+
break;
1088+
default:
1089+
kindName = "UNKNOWN";
1090+
break;
1091+
}
1092+
LOGS(_log, LOG_LVL_INFO,
1093+
"3: rewrite i->tr.getAlias()=" << i->tr.getAlias() << " i->info->table=" << i->info->table
1094+
<< " overlap=" << i->overlap << " kind=" << kindName);
1095+
if (i->info->kind == TableInfo::DIRECTOR || i->info->kind == TableInfo::MATCH) {
1096+
std::string tableName;
1097+
if (i->tr.hasAlias()) {
1098+
tableName = i->tr.getAlias();
1099+
} else {
1100+
tableName = i->info->database + "." + i->info->table;
1101+
}
1102+
if (tableNames.count(tableName) == 0) {
1103+
tableNames.insert(tableName);
1104+
auto predicate = i->makeSubChunkCompPredicate();
1105+
BoolFactor::Ptr bfactor = std::make_shared<BoolFactor>();
1106+
bfactor->_terms.push_back(predicate);
1107+
_query->getWhereClause(true).prependAndTerm(bfactor);
1108+
}
1109+
}
1110+
}
1111+
10611112
// Rewrite director table references not requiring overlap as their
10621113
// corresponding sub-chunk templates, and record the names of all
10631114
// sub-chunked tables.
10641115
for (ListIter i = _vertices.begin(), e = _vertices.end(); i != e; ++i) {
10651116
if (i->info->kind == TableInfo::DIRECTOR) {
10661117
if (i->overlap == 0.0) {
1067-
i->rewriteAsSubChunkTemplate();
1118+
i->rewriteAsChunkTemplate();
10681119
}
10691120
DbTable dbTable(i->info->database, i->info->table);
1070-
LOGS(_log, LOG_LVL_TRACE, "rewrite db=" << dbTable.db << " table=" << dbTable.table);
1121+
LOGS(_log, LOG_LVL_INFO,
1122+
"4: rewrite db=" << i->info->database << " table=" << i->info->table
1123+
<< " overlap=" << i->overlap);
10711124
mapping.insertSubChunkTable(dbTable);
10721125
}
10731126
}
10741127
unsigned n = static_cast<unsigned>(overlapRefs.size());
10751128
unsigned numPermutations = 1 << n;
1076-
// Each director requiring overlap must be rewritten as both a sub-chunk
1077-
// template and an overlap sub-chunk template. There are 2ⁿ different
1129+
// Each director requiring overlap must be rewritten as both a chunk
1130+
// template and chunk overlap template. There are 2ⁿ different
10781131
// template permutations for n directors requiring overlap; generate them
10791132
// all.
10801133
for (unsigned p = 0; p < numPermutations; ++p) {
10811134
for (unsigned i = 0; i < n; ++i) {
10821135
if ((p & (1 << i)) != 0) {
10831136
overlapRefs[i]->rewriteAsOverlapTemplate();
1137+
LOGS(_log, LOG_LVL_INFO,
1138+
"5.o: rewrite db=" << overlapRefs[i]->info->database
1139+
<< " table=" << overlapRefs[i]->info->table
1140+
<< " overlap=" << overlapRefs[i]->overlap);
10841141
} else {
1085-
overlapRefs[i]->rewriteAsSubChunkTemplate();
1142+
overlapRefs[i]->rewriteAsChunkTemplate();
1143+
LOGS(_log, LOG_LVL_INFO,
1144+
"5.c: rewrite db=" << overlapRefs[i]->info->database
1145+
<< " table=" << overlapRefs[i]->info->table
1146+
<< " overlap=" << overlapRefs[i]->overlap);
10861147
}
10871148
}
10881149
// Given the use of shared_ptr by the IR classes, we could shallow

src/qana/RelationGraph.h

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,7 @@
494494
namespace lsst::qserv {
495495
namespace query {
496496
class ColumnRef;
497+
class CompPredicate;
497498
class QueryContext;
498499
class SelectStmt;
499500
} // namespace query
@@ -577,19 +578,16 @@ struct Vertex {
577578
tr.setTable(info->getChunkTemplate());
578579
}
579580

580-
/// `rewriteAsSubChunkTemplate` rewrites `tr` to contain a sub-chunk
581-
/// specific name pattern.
582-
void rewriteAsSubChunkTemplate() {
583-
tr.setDb(info->getSubChunkDb());
584-
tr.setTable(info->getSubChunkTemplate());
585-
}
586-
587581
/// `rewriteAsOverlapTemplate` rewrites `tr` to contain an overlap
588582
/// sub-chunk specific name pattern.
589583
void rewriteAsOverlapTemplate() {
590-
tr.setDb(info->getSubChunkDb());
584+
tr.setDb(info->database);
591585
tr.setTable(info->getOverlapTemplate());
592586
}
587+
588+
/// `makeSubChunkCompPredicate` creates a shared pointer to a new
589+
/// comparison predicate for the sub-chunk column.
590+
std::shared_ptr<query::CompPredicate> makeSubChunkCompPredicate() const;
593591
};
594592

595593
std::ostream& operator<<(std::ostream& out, Vertex const& vertex);

src/qana/TableInfo.h

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@
6969
#include <vector>
7070

7171
// Qserv headers
72-
#include "global/constants.h" // for SUBCHUNKDB_PREFIX
72+
#include "global/constants.h"
7373

7474
// Forward declarations
7575
namespace lsst::qserv::query {
@@ -134,12 +134,8 @@ struct TableInfo {
134134
return false;
135135
}
136136

137-
std::string const getSubChunkDb() const { return SUBCHUNKDB_PREFIX + database + "_" + CHUNK_TAG; }
138137
std::string const getChunkTemplate() const { return table + "_" + CHUNK_TAG; }
139-
std::string const getSubChunkTemplate() const { return table + "_" + CHUNK_TAG + "_" + SUBCHUNK_TAG; }
140-
std::string const getOverlapTemplate() const {
141-
return table + "FullOverlap_" + CHUNK_TAG + "_" + SUBCHUNK_TAG;
142-
}
138+
std::string const getOverlapTemplate() const { return table + "FullOverlap_" + CHUNK_TAG; }
143139

144140
virtual void dump(std::ostream& os) const;
145141
};

src/qproc/testQueryAnaGeneral.cc

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -199,16 +199,20 @@ BOOST_AUTO_TEST_CASE(RestrictorNeighborCount) {
199199
"where qserv_areaspec_box(6,6,7,7) AND rFlux_PS<0.005 AND "
200200
"scisql_angSep(o1.ra_Test,o1.decl_Test,o2.ra_Test,o2.decl_Test) < 0.001;";
201201
std::string expected_100_subchunk_core =
202-
"SELECT count(*) AS `QS1_COUNT` FROM `Subchunks_LSST_100`.`Object_100_%S\007S%` AS "
203-
"`o1`,`Subchunks_LSST_100`.`Object_100_%S\007S%` AS `o2` "
204-
"WHERE scisql_s2PtInBox(`o1`.`ra_Test`,`o1`.`decl_Test`,6,6,7,7)=1 AND "
202+
"SELECT count(*) AS `QS1_COUNT` FROM `LSST`.`Object_100` AS "
203+
"`o1`,`LSST`.`Object_100` AS `o2` "
204+
"WHERE `o2`.`subChunkId`=%S\007S% "
205+
"AND `o1`.`subChunkId`=%S\007S% "
206+
"AND scisql_s2PtInBox(`o1`.`ra_Test`,`o1`.`decl_Test`,6,6,7,7)=1 AND "
205207
"scisql_s2PtInBox(`o2`.`ra_Test`,`o2`.`decl_Test`,6,6,7,7)=1 AND "
206208
"`o1`.`rFlux_PS`<0.005 AND "
207209
"scisql_angSep(`o1`.`ra_Test`,`o1`.`decl_Test`,`o2`.`ra_Test`,`o2`.`decl_Test`)<0.001";
208210
std::string expected_100_subchunk_overlap =
209-
"SELECT count(*) AS `QS1_COUNT` FROM `Subchunks_LSST_100`.`Object_100_%S\007S%` AS "
210-
"`o1`,`Subchunks_LSST_100`.`ObjectFullOverlap_100_%S\007S%` AS `o2` "
211-
"WHERE scisql_s2PtInBox(`o1`.`ra_Test`,`o1`.`decl_Test`,6,6,7,7)=1 "
211+
"SELECT count(*) AS `QS1_COUNT` FROM `LSST`.`Object_100` AS "
212+
"`o1`,`LSST`.`ObjectFullOverlap_100` AS `o2` "
213+
"WHERE `o2`.`subChunkId`=%S\007S% "
214+
"AND `o1`.`subChunkId`=%S\007S% "
215+
"AND scisql_s2PtInBox(`o1`.`ra_Test`,`o1`.`decl_Test`,6,6,7,7)=1 "
212216
"AND scisql_s2PtInBox(`o2`.`ra_Test`,`o2`.`decl_Test`,6,6,7,7)=1 "
213217
"AND `o1`.`rFlux_PS`<0.005 AND "
214218
"scisql_angSep(`o1`.`ra_Test`,`o1`.`decl_Test`,`o2`.`ra_Test`,`o2`.`decl_Test`)<0.001";
@@ -253,9 +257,11 @@ BOOST_AUTO_TEST_CASE(Triple) {
253257
"0.024 > scisql_angSep(o1.ra_Test,o1.decl_Test,o2.ra_Test,o2.decl_Test) and "
254258
"Source.objectIdSourceTest=o2.objectIdObjTest;";
255259
std::string expected =
256-
"SELECT * FROM `Subchunks_LSST_100`.`Object_100_%S\007S%` AS "
257-
"`o1`,`Subchunks_LSST_100`.`Object_100_%S\007S%` AS `o2`,`LSST`.`Source_100` AS `LSST.Source` "
258-
"WHERE `o1`.`id`!=`o2`.`id` AND "
260+
"SELECT * FROM `LSST`.`Object_100` AS "
261+
"`o1`,`LSST`.`Object_100` AS `o2`,`LSST`.`Source_100` AS `LSST.Source` "
262+
"WHERE `o2`.`subChunkId`=%S\007S% "
263+
"AND `o1`.`subChunkId`=%S\007S% "
264+
"AND `o1`.`id`!=`o2`.`id` AND "
259265
"0.024>scisql_angSep(`o1`.`ra_Test`,`o1`.`decl_Test`,`o2`.`ra_Test`,`o2`.`decl_Test`) AND "
260266
"`LSST.Source`.`objectIdSourceTest`=`o2`.`objectIdObjTest`";
261267

@@ -509,9 +515,11 @@ BOOST_AUTO_TEST_CASE(ObjectSelfJoinDistance) {
509515
"scisql_angSep(o1.ra_Test,o1.decl_Test,o2.ra_Test,o2.decl_Test) < 0.02";
510516
std::string expected =
511517
"SELECT count(*) AS `QS1_COUNT` "
512-
"FROM `Subchunks_LSST_100`.`Object_100_%S\007S%` AS `o1`,"
513-
"`Subchunks_LSST_100`.`Object_100_%S\007S%` AS `o2` "
514-
"WHERE scisql_s2PtInBox(`o1`.`ra_Test`,`o1`.`decl_Test`,5.5,5.5,6.1,6.1)=1 "
518+
"FROM `LSST`.`Object_100` AS `o1`,"
519+
"`LSST`.`Object_100` AS `o2` "
520+
"WHERE `o2`.`subChunkId`=%S\007S% "
521+
"AND `o1`.`subChunkId`=%S\007S% "
522+
"AND scisql_s2PtInBox(`o1`.`ra_Test`,`o1`.`decl_Test`,5.5,5.5,6.1,6.1)=1 "
515523
"AND scisql_s2PtInBox(`o2`.`ra_Test`,`o2`.`decl_Test`,5.5,5.5,6.1,6.1)=1 "
516524
"AND scisql_angSep(`o1`.`ra_Test`,`o1`.`decl_Test`,`o2`.`ra_Test`,`o2`.`decl_Test`)<0.02";
517525
qsTest.sqlConfig =
@@ -951,9 +959,11 @@ BOOST_AUTO_TEST_CASE(FuncExprPred) {
951959
"(scisql_fluxToAbMag(o2.gFlux_PS)-scisql_fluxToAbMag(o2.rFlux_PS)) ) < 1;";
952960
expected =
953961
"SELECT `o1`.`objectId` AS `o1.objectId`,`o2`.`objectId` AS `objectId2` "
954-
"FROM `Subchunks_LSST_100`.`Object_100_%S\007S%` AS "
955-
"`o1`,`Subchunks_LSST_100`.`Object_100_%S\007S%` AS `o2` "
956-
"WHERE scisql_angSep(`o1`.`ra_Test`,`o1`.`decl_Test`,`o2`.`ra_Test`,`o2`.`decl_Test`)<0.00001 "
962+
"FROM `LSST`.`Object_100` AS "
963+
"`o1`,`LSST`.`Object_100` AS `o2` "
964+
"WHERE `o2`.`subChunkId`=%S\007S% "
965+
"AND `o1`.`subChunkId`=%S\007S% "
966+
"AND scisql_angSep(`o1`.`ra_Test`,`o1`.`decl_Test`,`o2`.`ra_Test`,`o2`.`decl_Test`)<0.00001 "
957967
"AND `o1`.`objectId`<>`o2`.`objectId` AND "
958968
"ABS((scisql_fluxToAbMag(`o1`.`gFlux_PS`)-scisql_fluxToAbMag(`o1`.`rFlux_PS`))-(scisql_"
959969
"fluxToAbMag(`o2`.`gFlux_PS`)-scisql_fluxToAbMag(`o2`.`rFlux_PS`)))<1";
@@ -1294,18 +1304,24 @@ BOOST_AUTO_TEST_CASE(Case01_1081) {
12941304
"WHERE closestToObj = 1 OR closestToObj is NULL;";
12951305
std::string expected_100_subchunk_core =
12961306
"SELECT count(*) AS `QS1_COUNT` "
1297-
"FROM `Subchunks_LSST_100`.`Object_100_%S\007S%` AS `o` "
1307+
"FROM `LSST`.`Object_100` AS `o` "
12981308
"INNER JOIN `LSST`.`RefObjMatch_100` AS `o2t` ON `o`.`objectIdObjTest`=`o2t`.`objectId` "
1299-
"INNER JOIN `Subchunks_LSST_100`.`SimRefObject_100_%S\007S%` AS `t` ON "
1309+
"INNER JOIN `LSST`.`SimRefObject_100` AS `t` ON "
13001310
"`o2t`.`refObjectId`=`t`.`refObjectId` "
1301-
"WHERE `o`.`closestToObj`=1 OR `o`.`closestToObj` IS NULL";
1311+
"WHERE `t`.`subChunkId`=%S\007S% "
1312+
"AND `o2t`.`subChunkId`=%S\007S% "
1313+
"AND `o`.`subChunkId`=%S\007S% "
1314+
"AND (`o`.`closestToObj`=1 OR `o`.`closestToObj` IS NULL)";
13021315
std::string expected_100_subchunk_overlap =
13031316
"SELECT count(*) AS `QS1_COUNT` "
1304-
"FROM `Subchunks_LSST_100`.`Object_100_%S\007S%` AS `o` "
1317+
"FROM `LSST`.`Object_100` AS `o` "
13051318
"INNER JOIN `LSST`.`RefObjMatch_100` AS `o2t` ON `o`.`objectIdObjTest`=`o2t`.`objectId` "
1306-
"INNER JOIN `Subchunks_LSST_100`.`SimRefObjectFullOverlap_100_%S\007S%` AS `t` ON "
1319+
"INNER JOIN `LSST`.`SimRefObjectFullOverlap_100` AS `t` ON "
13071320
"`o2t`.`refObjectId`=`t`.`refObjectId` "
1308-
"WHERE `o`.`closestToObj`=1 OR `o`.`closestToObj` IS NULL";
1321+
"WHERE `t`.`subChunkId`=%S\007S% "
1322+
"AND `o2t`.`subChunkId`=%S\007S% "
1323+
"AND `o`.`subChunkId`=%S\007S% "
1324+
"AND (`o`.`closestToObj`=1 OR `o`.`closestToObj` IS NULL)";
13091325
qsTest.sqlConfig =
13101326
SqlConfig(SqlConfig::MockDbTableColumns({{"LSST",
13111327
{{"Object", {"objectIdObjTest", "closestToObj"}},

src/query/SelectStmt.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,11 @@ void SelectStmt::setFromListAsTable(std::string const& t) {
169169
_fromList = std::make_shared<FromList>(tr);
170170
}
171171

172+
WhereClause& SelectStmt::getWhereClause(bool createIfMissing) {
173+
if (createIfMissing && !_whereClause) _whereClause = std::make_shared<WhereClause>();
174+
return *_whereClause;
175+
}
176+
172177
bool SelectStmt::operator==(const SelectStmt& rhs) const {
173178
return (util::ptrCompare<FromList>(_fromList, rhs._fromList) &&
174179
util::ptrCompare<SelectList>(_selectList, rhs._selectList) &&

src/query/SelectStmt.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ class SelectStmt {
104104

105105
bool hasWhereClause() const { return static_cast<bool>(_whereClause); }
106106
WhereClause const& getWhereClause() const { return *_whereClause; }
107-
WhereClause& getWhereClause() { return *_whereClause; }
107+
WhereClause& getWhereClause(bool createIfMissing = false);
108108
void setWhereClause(std::shared_ptr<WhereClause> w) { _whereClause = w; }
109109

110110
/**

0 commit comments

Comments
 (0)