Skip to content

Commit 1895119

Browse files
author
Jim Avery
committed
Fixed a critical bug in the recommendation scores
1 parent 9ca72fe commit 1895119

File tree

5 files changed

+32
-60
lines changed

5 files changed

+32
-60
lines changed

PostgreSQL/initmovielens1mdatabase.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ create index itemrating_index on ratings(itemid);
1414

1515
set client_encoding = LATIN1;
1616

17-
COPY users(userid,gender,age,job,zipcode) from '/home/administrator/javery/recdb-postgresql/PostgreSQL/moviedata/MovieLens1M/users.dat' DELIMITERS ';';
18-
COPY items(itemid,name,genre) from '/home/administrator/javery/recdb-postgresql/PostgreSQL/moviedata/MovieLens1M/movies.dat' DELIMITERS ';';
19-
COPY ratings(userid,itemid,ratingval,ratingts) from '/home/administrator/javery/recdb-postgresql/PostgreSQL/moviedata/MovieLens1M/ratings.dat' DELIMITERS ';';
17+
COPY users(userid,gender,age,job,zipcode) from '/home/jim/RecDBnew/recdb-postgresql/PostgreSQL/moviedata/MovieLens1M/users.dat' DELIMITERS ';';
18+
COPY items(itemid,name,genre) from '/home/jim/RecDBnew/recdb-postgresql/PostgreSQL/moviedata/MovieLens1M/movies.dat' DELIMITERS ';';
19+
COPY ratings(userid,itemid,ratingval,ratingts) from '/home/jim/RecDBnew/recdb-postgresql/PostgreSQL/moviedata/MovieLens1M/ratings.dat' DELIMITERS ';';
2020

2121
CREATE TABLE recathonheavyusers (userid integer primary key);
2222
insert into recathonheavyusers select t.userid from (select userid,count(*) as rating from ratings group by userid order by rating desc) t limit 100;

PostgreSQL/src/backend/executor/execRecommend.c

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -477,12 +477,13 @@ ExecFilterRecommend(RecScanState *recnode,
477477
}
478478

479479
/* If we get here, then we found a user who will be actually
480-
* returned in the results. */
480+
* returned in the results. One quick reset here. */
481+
recnode->fullItemNum = 0;
481482
}
482483

483484
/* Mark the user ID and index. */
484485
attributes->userID = userID;
485-
recnode->userindex = recnode->userNum;
486+
recnode->userindex = userindex;
486487

487488
/* With the user ID determined, we need to investigate and see
488489
* if this is a new user. If so, attempt to create prediction
@@ -495,9 +496,13 @@ ExecFilterRecommend(RecScanState *recnode,
495496

496497
/* Now replace the item ID, if the user is valid. Otherwise,
497498
* leave the item ID as is, as it doesn't matter what it is. */
498-
itemindex = recnode->itemNum;
499499
if (recnode->validUser)
500-
itemID = recnode->itemList[itemindex];
500+
itemID = recnode->itemList[recnode->itemNum];
501+
while (recnode->fullItemList[recnode->fullItemNum] < itemID)
502+
recnode->fullItemNum++;
503+
itemindex = recnode->fullItemNum;
504+
if (recnode->fullItemList[itemindex] > itemID)
505+
elog(ERROR, "critical item mismatch in ExecRecommend");
501506

502507
/* Plug in the data, marking those columns full. We also need to
503508
* mark the rating column with something temporary. */
@@ -524,6 +529,7 @@ ExecFilterRecommend(RecScanState *recnode,
524529
recnode->userNum++;
525530
recnode->newUser = true;
526531
recnode->itemNum = 0;
532+
recnode->fullItemNum = 0;
527533
if (recnode->userNum >= recnode->totalUsers)
528534
recnode->finished = true;
529535
}
@@ -974,10 +980,6 @@ ExecRecScan(RecScanState *node)
974980
void
975981
ExecEndRecScan(RecScanState *node)
976982
{
977-
AttributeInfo* attributes;
978-
979-
attributes = (AttributeInfo*) node->attributes;
980-
981983
/* End the normal scan. */
982984
switch(nodeTag(node->subscan)) {
983985
case T_SeqScanState:
@@ -992,6 +994,8 @@ ExecEndRecScan(RecScanState *node)
992994
/* Now for extra stuff. */
993995
if (node->itemList)
994996
pfree(node->itemList);
997+
if (node->fullItemList)
998+
pfree(node->fullItemList);
995999
if (node->userFeatures)
9961000
pfree(node->userFeatures);
9971001
if (node->base_slot)

PostgreSQL/src/backend/executor/nodeRecjoin.c

Lines changed: 6 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
* IDENTIFICATION
1313
* src/backend/executor/nodeRecjoin.c
1414
*
15+
* THIS CODE IS CURRENTLY NOT INCORPORATED IN RECDB.
16+
*
1517
*-------------------------------------------------------------------------
1618
*/
1719
/*
@@ -114,7 +116,7 @@ ExecRecJoin(RecJoinState *recjoin)
114116
* until we're done projecting out tuples from a join tuple.
115117
*/
116118
ResetExprContext(econtext);
117-
printf("RecJoin.\n");
119+
118120
/*
119121
* Ok, everything is setup for the join. We're going to get exactly one
120122
* tuple from the outer plan, because we just want to use its tupleDesc
@@ -131,23 +133,21 @@ printf("RecJoin.\n");
131133
int i, userID, innerItemID, natts;
132134
GenRating *findRating;
133135
bool minimalTuple = false;
134-
printf("Point 1.\n");
136+
135137
/*
136138
* If we need an outer tuple, we fetch one. This creates a few
137139
* structures that we need to effectively perform a RecJoin.
138140
* It works both for initializing and resuming the inner loop.
139141
*/
140142
if (recjoin->rj_NeedNewOuter) {
141143
GenRating *tempItem;
142-
printf("Point 2.\n");
143144

144145
outerTupleSlot = ExecProcNode(outerPlan);
145146
/* If this happens, we're out of users. */
146147
if (TupIsNull(outerTupleSlot)) {
147148
ENL1_printf("no outer tuple (out of users), ending join");
148149
return NULL;
149150
}
150-
printf("Point 3.\n");
151151

152152
/* Otherwise, we need to construct our hash table, since
153153
* we need info from the previous operator to do so. */
@@ -163,20 +163,17 @@ printf("Point 3.\n");
163163

164164
hashAdd(recjoin->itemTable,tempItem);
165165
}
166-
printf("Point 4.\n");
167166

168167
/* Then we'll do some other stuff to ensure the loop
169168
* runs correctly. */
170169
recjoin->rj_NeedNewOuter = false;
171170
ENL1_printf("rescanning inner plan");
172171
ExecReScan(innerPlan);
173172
}
174-
printf("Point 5.\n");
175173

176174
/* We construct a new tuple on the fly. */
177175
outerTupleSlot = MakeSingleTupleTableSlot(recnode->base_slot);
178176
outerTupleSlot->tts_isempty = false;
179-
printf("Point 6.\n");
180177

181178
/* Mark all slots as non-empty and zero. */
182179
natts = outerTupleSlot->tts_tupleDescriptor->natts;
@@ -186,7 +183,6 @@ printf("Point 6.\n");
186183
outerTupleSlot->tts_isnull[i] = false;
187184
outerTupleSlot->tts_nvalid++;
188185
}
189-
printf("Point 7.\n");
190186

191187
/*
192188
* try to get the next inner tuple.
@@ -202,17 +198,16 @@ printf("Point 7.\n");
202198
if (recjoin->innerTupleAtt < 0) {
203199
for (i = 0; i < innerTupleSlot->tts_tupleDescriptor->natts; i++) {
204200
char* col_name = innerTupleSlot->tts_tupleDescriptor->attrs[i]->attname.data;
205-
//printf("%s\n",col_name);
201+
206202
if (strcmp(col_name,attributes->itemkey) == 0) {
207203
recjoin->innerTupleAtt = i;
208204
break;
209205
}
210206
}
211207
}
212-
printf("Point 7.5.\n");
208+
213209
if (!TupIsNull(innerTupleSlot) && innerTupleSlot->tts_mintuple)
214210
minimalTuple = true;
215-
printf("Point 8.\n");
216211

217212
/* If there's no inner tuple, then we'll make a note to reset the
218213
* inner loop and get a new outer tuple. */
@@ -222,7 +217,6 @@ printf("Point 8.\n");
222217
recjoin->rj_NeedNewOuter = true;
223218
continue;
224219
}
225-
printf("Point 9.\n");
226220

227221
/*
228222
* We now have an inner tuple and a shell of an outer tuple. We need
@@ -247,18 +241,12 @@ printf("Point 9.\n");
247241
FreeTupleDesc(tempdesc);*/
248242
}
249243
userID = attributes->userID;
250-
printf("Point 10.\n");
251244

252245
/*
253246
* Is this item ID one of the ones we need to predict a rating for?
254247
*/
255-
if (!recjoin->itemTable) printf("No table.\n");
256-
printf("innerItemID = %d\n",innerItemID);
257-
printf("userID = %d\n",userID);
258-
if (innerItemID < 0) {printf("Skipping item.\n");continue;}
259248
findRating = hashFind(recjoin->itemTable,innerItemID);
260249
if (!findRating) continue;
261-
printf("Point 11.\n");
262250

263251
/*
264252
* We're ok to construct a tuple at this point.
@@ -269,7 +257,6 @@ printf("Point 11.\n");
269257
outerTupleSlot->tts_isnull[recnode->itematt] = false;
270258

271259
econtext->ecxt_outertuple = outerTupleSlot;
272-
printf("Point 12.\n");
273260

274261
/*
275262
* at this point we have a new pair of inner and outer tuples so we
@@ -284,23 +271,20 @@ printf("Point 12.\n");
284271
if (ExecQual(joinqual, econtext, false))
285272
{
286273
node->nl_MatchedOuter = true;
287-
printf("Point 13.\n");
288274

289275
/* In an antijoin, we never return a matched tuple */
290276
if (node->js.jointype == JOIN_ANTI)
291277
{
292278
node->nl_NeedNewOuter = true;
293279
continue; /* return to top of loop */
294280
}
295-
printf("Point 14.\n");
296281

297282
/*
298283
* In a semijoin, we'll consider returning the first match, but
299284
* after that we're done with this outer tuple.
300285
*/
301286
if (node->js.jointype == JOIN_SEMI)
302287
node->nl_NeedNewOuter = true;
303-
printf("Point 15.\n");
304288

305289
if (otherqual == NIL || ExecQual(otherqual, econtext, false))
306290
{
@@ -310,7 +294,6 @@ printf("Point 15.\n");
310294
*/
311295
TupleTableSlot *result;
312296
ExprDoneCond isDone;
313-
printf("Point 16.\n");
314297

315298
/*
316299
* The tuples match our qualifications. We now apply
@@ -319,12 +302,10 @@ printf("Point 16.\n");
319302
*/
320303
int itemindex = binarySearch(recnode->fullItemList, innerItemID, 0, recnode->fullTotalItems);
321304
applyRecScore(recnode, outerTupleSlot, innerItemID, itemindex);
322-
printf("Point 17.\n");
323305

324306
ENL1_printf("qualification succeeded, projecting tuple");
325307

326308
result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
327-
printf("Point 18.\n");
328309

329310
if (isDone != ExprEndResult)
330311
{
@@ -343,7 +324,6 @@ printf("Point 18.\n");
343324
* Tuple fails qual, so free per-tuple memory and try again.
344325
*/
345326
ResetExprContext(econtext);
346-
printf("Point 19.\n");
347327

348328
ENL1_printf("qualification failed, looping");
349329
}
@@ -356,8 +336,6 @@ printf("Point 19.\n");
356336
RecJoinState *
357337
ExecInitRecJoin(RecJoin *node, EState *estate, int eflags)
358338
{
359-
int i;
360-
GenRating *tempItem;
361339
RecJoinState *rjstate;
362340

363341
rjstate = makeNode(RecJoinState);

PostgreSQL/src/backend/parser/parse_rec.c

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ static void modifyColumnRef(ColumnRef *attribute, char *recname, char *viewname)
3535
static void modifyFrom(SelectStmt *stmt, RecommendInfo *recInfo);
3636
static void filterfirst(Node *whereExpr, RecommendInfo *recInfo);
3737
static bool filterfirstrecurse(Node *whereExpr, RecommendInfo *recInfo);
38-
static void applyRecJoin(Node *whereClause, List *fromClause, RecommendInfo *recInfo);
39-
static RangeVar* locateJoinTable(Node* recExpr, List *fromClause, RangeVar* eventtable, char* key);
38+
//static void applyRecJoin(Node *whereClause, List *fromClause, RecommendInfo *recInfo);
39+
//static RangeVar* locateJoinTable(Node* recExpr, List *fromClause, RangeVar* eventtable, char* key);
4040
static bool tableMatch(RangeVar* table, char* tablename);
4141

4242
/*
@@ -687,8 +687,9 @@ filterfirstrecurse(Node *whereExpr, RecommendInfo *recInfo) {
687687
/*
688688
* applyRecJoin -
689689
* A function to determine if we need to employ a RecJoin.
690+
* CURRENTLY NOT IN USE.
690691
*/
691-
static void
692+
/*static void
692693
applyRecJoin(Node *whereClause, List *fromClause, RecommendInfo *recInfo) {
693694
RangeVar *partnerTable;
694695
AttributeInfo *attributes = recInfo->attributes;
@@ -705,19 +706,10 @@ applyRecJoin(Node *whereClause, List *fromClause, RecommendInfo *recInfo) {
705706
if (!partnerTable)
706707
partnerTable = locateJoinTable(whereClause, fromClause,
707708
recInfo->recommender, attributes->userkey);
708-
// else
709-
// ereport(ERROR,
710-
// (errcode(ERRCODE_SYNTAX_ERROR),
711-
// errmsg("found item ID match")));
712-
713709
714710
// If we found no such table, give up.
715711
if (!partnerTable)
716712
return;
717-
// else
718-
// ereport(ERROR,
719-
// (errcode(ERRCODE_SYNTAX_ERROR),
720-
// errmsg("found user ID match")));
721713
722714
// Otherwise, we found an appropriate table. Make a note.
723715
recInfo->opType = OP_JOIN;
@@ -726,23 +718,24 @@ applyRecJoin(Node *whereClause, List *fromClause, RecommendInfo *recInfo) {
726718
partnerInfo = makeNode(RecommendInfo);
727719
partnerInfo->opType = OP_JOINPARTNER;
728720
partnerTable->recommender = partnerInfo;
729-
}
721+
}*/
730722

731723

732724
/*
733725
* locateJoinTable -
734726
* A function to search through the WHERE clause and see if we are
735727
* joining our recommender with some other table, either by item ID
736728
* or user ID. We give preference to item ID.
729+
* CURRENTLY NOT IN USE.
737730
*/
738-
static RangeVar*
731+
/*static RangeVar*
739732
locateJoinTable(Node* recExpr, List *fromClause, RangeVar* eventtable, char* key) {
740733
A_Expr *recAExpr;
741734
742735
if (!recExpr)
743736
return NULL;
744737
745-
/* Turns out this isn't necessarily an A_Expr. */
738+
// Turns out this isn't necessarily an A_Expr.
746739
if (nodeTag(recExpr) != T_A_Expr)
747740
return NULL;
748741
@@ -835,7 +828,7 @@ locateJoinTable(Node* recExpr, List *fromClause, RangeVar* eventtable, char* key
835828
836829
// All other kinds fail, at least for now.
837830
return NULL;
838-
}
831+
}*/
839832

840833
/*
841834
* tableMatch -

PostgreSQL/src/backend/utils/misc/recathon.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2961,7 +2961,7 @@ generateItemCosModel(RecScanState *recnode) {
29612961
PlanState *simplanstate;
29622962
TupleTableSlot *simslot;
29632963
MemoryContext simcontext;
2964-
printf("Starting model.\n");
2964+
29652965
attributes = (AttributeInfo*) recnode->attributes;
29662966
eventtable = attributes->eventtable;
29672967
userkey = attributes->userkey;
@@ -3026,7 +3026,7 @@ printf("Starting model.\n");
30263026

30273027
/* Query cleanup. */
30283028
recathon_queryEnd(simqueryDesc, simcontext);
3029-
printf("Calculating similarities.\n");
3029+
30303030
/* Now we do the similarity calculations. Note that we
30313031
* don't include duplicate entries, to save time and space.
30323032
* The first item ALWAYS has a lower value than the second. */
@@ -3069,8 +3069,6 @@ printf("Calculating similarities.\n");
30693069
recnode->fullTotalItems = numItems;
30703070
recnode->fullItemList = itemIDs;
30713071
recnode->itemCFmodel = itemmodel;
3072-
3073-
printf("Model complete.\n");
30743072
}
30753073

30763074
/* ----------------------------------------------------------------
@@ -4026,7 +4024,6 @@ prepUserForRating(RecScanState *recstate, int userID) {
40264024
applyItemSimGenerate(recstate);
40274025
else
40284026
applyItemSim(recstate, attributes->recModelName);
4029-
40304027
break;
40314028
case userCosCF:
40324029
case userPearCF:

0 commit comments

Comments
 (0)