diff --git a/compiler/src/test/scala/ingraph/sandbox/BiCompilerTest.scala b/compiler/src/test/scala/ingraph/sandbox/BiCompilerTest.scala index 8171a1787..760f8aaf2 100644 --- a/compiler/src/test/scala/ingraph/sandbox/BiCompilerTest.scala +++ b/compiler/src/test/scala/ingraph/sandbox/BiCompilerTest.scala @@ -12,982 +12,104 @@ class BiCompilerTest extends CompilerTest { , printFPlan = true ) - ignore("bi-01 from file: Posting summary") { + test("bi-01 from file: Posting summary") { val stages=compileFromFile("bi-01") } - test("bi-01: Posting summary") { - val stages = compile( - """// Q1. Posting summary - |/* - | :param { date: 20110721220000000 } - |*/ - |MATCH (message:Message) - |WHERE message.creationDate <= $date - |WITH count(message) AS totalMessageCountInt // this should be a subquery once Cypher supports it - |WITH toFloat(totalMessageCountInt) AS totalMessageCount - |MATCH (message:Message) - |WHERE message.creationDate <= $date - | AND message.content IS NOT NULL - |WITH - | totalMessageCount, - | message, - | message.creationDate/10000000000000 AS year - |WITH - | totalMessageCount, - | year, - | (message:Comment) AS isComment, - | CASE - | WHEN message.length < 40 THEN 0 - | WHEN message.length < 80 THEN 1 - | WHEN message.length < 160 THEN 2 - | ELSE 3 - | END AS lengthCategory, - | count(message) AS messageCount, - | floor(avg(message.length)) AS averageMessageLength, - | sum(message.length) AS sumMessageLength - |RETURN - | year, - | isComment, - | lengthCategory, - | messageCount, - | averageMessageLength, - | sumMessageLength, - | messageCount / totalMessageCount AS percentageOfMessages - |ORDER BY - | year DESC, - | isComment ASC, - | lengthCategory ASC - """.stripMargin) - } - ignore("bi-02 from file: Top tags for country, age, gender, time") { val stages=compileFromFile("bi-02") } - ignore("bi-02: Top tags for country, age, gender, time") { - val stages = compile( - """// Q2. Top tags for country, age, gender, time - |/* - | :param { - | date1: 20091231230000000, - | date2: 20101107230000000, - | country1: 'Ethiopia', - | country2: 'Belarus' - | } - |*/ - |MATCH - | (country:Country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person:Person) - | <-[:HAS_CREATOR]-(message:Message)-[:HAS_TAG]->(tag:Tag) - |WHERE message.creationDate >= $startDate - | AND message.creationDate <= $endDate - | AND (country.name = $country1 OR country.name = $country2) - |WITH - | country.name AS countryName, - | message.creationDate/100000000000%100 AS month, - | person.gender AS gender, - | floor((20130101 - person.birthday) / 10000 / 5.0) AS ageGroup, - | tag.name AS tagName, - | message - |WITH - | countryName, month, gender, ageGroup, tagName, count(message) AS messageCount - |WHERE messageCount > 100 - |RETURN - | countryName, - | month, - | gender, - | ageGroup, - | tagName, - | messageCount - |ORDER BY - | messageCount DESC, - | tagName ASC, - | ageGroup ASC, - | gender ASC, - | month ASC, - | countryName ASC - |LIMIT 100 - """.stripMargin) - } - ignore("bi-03 from file: Tag evolution") { val stages=compileFromFile("bi-03") } - ignore("bi-03: Tag evolution") { - val stages = compile( - """// Q3. Tag evolution - |/* - | :param { - | year: 2010, - | month: 10 - | } - |*/ - |WITH - | $year AS year1, - | $month AS month1, - | $year + toInteger($month / 12.0) AS year2, - | $month % 12 + 1 AS month2 - |// year-month 1 - |MATCH (tag:Tag) - |OPTIONAL MATCH (message1:Message)-[:HAS_TAG]->(tag) - | WHERE message1.creationDate/10000000000000 = year1 - | AND message1.creationDate/100000000000%100 = month1 - |WITH year2, month2, tag, count(message1) AS countMonth1 - |// year-month 2 - |OPTIONAL MATCH (message2:Message)-[:HAS_TAG]->(tag) - | WHERE message2.creationDate/10000000000000 = year2 - | AND message2.creationDate/100000000000%100 = month2 - |WITH - | tag, - | countMonth1, - | count(message2) AS countMonth2 - |RETURN - | tag.name, - | countMonth1, - | countMonth2, - | abs(countMonth1-countMonth2) AS diff - |ORDER BY - | diff DESC, - | tag.name ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-04 from file: Popular topics in a country") { + test("bi-04 from file: Popular topics in a country") { val stages=compileFromFile("bi-04") } - test("bi-04: Popular topics in a country") { - val stages = compile( - """// Q4. Popular topics in a country - |/* - | :param { - | tagClass: 'MusicalArtist', - | country: 'Burma' - | } - |*/ - |MATCH - | (:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- - | (person:Person)<-[:HAS_MODERATOR]-(forum:Forum)-[:CONTAINER_OF]-> - | (post:Post)-[:HAS_TAG]->(:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass}) - |RETURN - | forum.id, - | forum.title, - | forum.creationDate, - | person.id, - | count(DISTINCT post) AS postCount - |ORDER BY - | postCount DESC, - | forum.id ASC - |LIMIT 20 """.stripMargin) - } - ignore("bi-05 from file: Top posters in a country") { val stages=compileFromFile("bi-05") } - ignore("bi-05: Top posters in a country") { - val stages = compile( - """// Q5. Top posters in a country - |/* - | :param { country: 'Belarus' } - |*/ - |MATCH - | (:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- - | (person:Person)<-[:HAS_MEMBER]-(forum:Forum) - |WITH forum, count(person) AS numberOfMembers - |ORDER BY numberOfMembers DESC, forum.id ASC - |LIMIT 100 - |WITH collect(forum) AS popularForums - |UNWIND popularForums AS forum - |MATCH - | (forum)-[:HAS_MEMBER]->(person:Person) - |OPTIONAL MATCH - | (person)<-[:HAS_CREATOR]-(post:Post)<-[:CONTAINER_OF]-(popularForum:Forum) - |WHERE popularForum IN popularForums - |RETURN - | person.id, - | person.firstName, - | person.lastName, - | person.creationDate, - | count(DISTINCT post) AS postCount - |ORDER BY - | postCount DESC, - | person.id ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-06 from file: Most active Posters of a given Topic") { + test("bi-06 from file: Most active Posters of a given Topic") { val stages=compileFromFile("bi-06") } - test("bi-06: Most active Posters of a given Topic") { - val stages = compile( - """// Q6. Most active Posters of a given Topic - |/* - | :param { tag: 'Abbas_I_of_Persia' } - |*/ - |MATCH (tag:Tag {name: $tag})<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person:Person) - |OPTIONAL MATCH (:Person)-[like:LIKES]->(message) - |OPTIONAL MATCH (message)<-[:REPLY_OF]-(comment:Comment) - |WITH person, count(DISTINCT like) AS likeCount, count(DISTINCT comment) AS replyCount, count(DISTINCT message) AS messageCount - |RETURN - | person.id, - | messageCount, - | replyCount, - | likeCount, - | 1*messageCount + 2*replyCount + 10*likeCount AS score - |ORDER BY - | score DESC, - | person.id ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-07 from file: Most authoritative users on a given topic") { + test("bi-07 from file: Most authoritative users on a given topic") { val stages=compileFromFile("bi-07") } - test("bi-07: Most authoritative users on a given topic") { - val stages = compile( - """// Q7. Most authoritative users on a given topic - |/* - | :param { tag: 'Arnold_Schwarzenegger' } - |*/ - |MATCH (tag:Tag {name: $tag}) - |MATCH (tag)<-[:HAS_TAG]-(message1:Message)-[:HAS_CREATOR]->(person1:Person) - |MATCH (tag)<-[:HAS_TAG]-(message2:Message)-[:HAS_CREATOR]->(person1) - |OPTIONAL MATCH (message2)<-[:LIKES]-(person2:Person) - |OPTIONAL MATCH (person2)<-[:HAS_CREATOR]-(message3:Message)<-[like:LIKES]-(p3:Person) - |RETURN - | person1.id, - | count(DISTINCT like) AS authorityScore - |ORDER BY - | authorityScore DESC, - | person1.id ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-08 from file: Related Topics") { + test("bi-08 from file: Related Topics") { val stages=compileFromFile("bi-08") } - test("bi-08: Related Topics") { - val stages = compile( - """// Q8. Related Topics - |/* - | :param { tag: 'Genghis_Khan' } - |*/ - |MATCH - | (tag:Tag {name: $tag})<-[:HAS_TAG]-(message:Message), - | (message)<-[:REPLY_OF]-(comment:Comment)-[:HAS_TAG]->(relatedTag:Tag) - | // there is no need to filter for relatedTag.name != $tag, as the edge uniqueness constraint takes care of that - |WHERE NOT (comment)-[:HAS_TAG]->(tag) - |RETURN - | relatedTag.name, - | count(DISTINCT comment) AS count - |ORDER BY - | count DESC, - | relatedTag.name ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-09 from file: Forum with related Tags") { + test("bi-09 from file: Forum with related Tags") { val stages=compileFromFile("bi-09") } - test("bi-09: Forum with related Tags") { - val stages = compile( - """// Q9. Forum with related Tags - |/* - | :param { - | tagClass1: 'BaseballPlayer', - | tagClass2: 'ChristianBishop', - | threshold: 200 - | } - |*/ - |MATCH - | (forum:Forum)-[:HAS_MEMBER]->(person:Person) - |WITH forum, count(person) AS members - |WHERE members > $threshold - |MATCH - | (forum)-[:CONTAINER_OF]->(post1:Post)-[:HAS_TAG]-> - | (:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass1}) - |WITH forum, count(DISTINCT post1) AS count1 - |MATCH - | (forum)-[:CONTAINER_OF]->(post2:Post)-[:HAS_TAG]-> - | (:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass2}) - |WITH forum, count1, count(DISTINCT post2) AS count2 - |RETURN - | forum.id, - | count1, - | count2 - |ORDER BY - | abs(count2-count1) DESC, - | forum.id ASC - |LIMIT 100 - """.stripMargin) - } - ignore("bi-10 from file: Central Person for a Tag") { val stages=compileFromFile("bi-10") } - ignore("bi-10: Central Person for a Tag") { - val stages = compile( - """// Q10. Central Person for a Tag - |/* - | :param { - | tag: 'John_Rhys-Davies', - | date: 20120122000000000 - | } - |*/ - |MATCH (tag:Tag {name: $tag}) - |// score - |OPTIONAL MATCH (tag)<-[interest:HAS_INTEREST]-(person:Person) - |WITH tag, collect(person) AS interestedPersons - |OPTIONAL MATCH (tag)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person:Person) - | WHERE message.creationDate > $date - |WITH tag, interestedPersons + collect(person) AS persons - |UNWIND persons AS person - |// poor man's disjunct union (should be changed to UNION + post-union processing in the future) - |WITH DISTINCT tag, person - |OPTIONAL MATCH (tag)<-[interest:HAS_INTEREST]-(person:Person) - |WITH - | tag, - | person, - | 100 * count(interest) AS score - |OPTIONAL MATCH (tag)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person) - | WHERE message.creationDate > $date - |WITH - | tag, - | person, - | score + count(message) AS score - |MATCH (person)-[:KNOWS]-(friend) - |WITH - | tag, - | person, - | score, - | friend - |OPTIONAL MATCH (tag)<-[interest:HAS_INTEREST]-(friend:Person) - |WITH - | tag, - | person, - | score, - | friend, - | 100 * count(interest) AS friendScore - |OPTIONAL MATCH (tag)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(friend) - | WHERE message.creationDate > $date - |WITH - | person, - | score, - | friend, - | friendScore + count(message) AS friendScore - |RETURN - | person.id, - | score, - | sum(friendScore) AS friendsScore - |ORDER BY - | score + friendsScore DESC, - | person.id ASC - |LIMIT 100 - """.stripMargin) - } - ignore("bi-11 from file: Unrelated replies") { val stages=compileFromFile("bi-11") } - ignore("bi-11: Unrelated replies") { - val stages = compile( - """// Q11. Unrelated replies - |/* - | :param { - | country: 'Germany', - | blacklist: ['also', 'Pope', 'that', 'James', 'Henry', 'one', 'Green'] - | } - |*/ - |WITH $blacklist AS blacklist - |MATCH - | (country:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- - | (person:Person)<-[:HAS_CREATOR]-(reply:Comment)-[:REPLY_OF]->(message:Message), - | (reply)-[:HAS_TAG]->(tag:Tag) - |WHERE NOT (message)-[:HAS_TAG]->(:Tag)<-[:HAS_TAG]-(reply) - | AND size([word IN blacklist WHERE reply.content CONTAINS word | word]) = 0 - |OPTIONAL MATCH - | (:Person)-[like:LIKES]->(reply) - |RETURN - | person.id, - | tag.name, - | count(DISTINCT like) AS countLikes, - | count(DISTINCT reply) AS countReplies - |ORDER BY - | countLikes DESC, - | person.id ASC, - | tag.name ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-12 from file: Trending Posts") { + test("bi-12 from file: Trending Posts") { val stages=compileFromFile("bi-12") } - test("bi-12: Trending Posts") { - val stages = compile( - """// Q12. Trending Posts - |/* - | :param { - | date: 20110721220000000, - | likeThreshold: 400 - | } - |*/ - |MATCH - | (message:Message)-[:HAS_CREATOR]->(creator:Person), - | (message)<-[like:LIKES]-(:Person) - |WHERE message.creationDate > $date - |WITH message, creator, count(like) AS likeCount - |WHERE likeCount > $likeThreshold - |RETURN - | message.id, - | message.creationDate, - | creator.firstName, - | creator.lastName, - | likeCount - |ORDER BY - | likeCount DESC, - | message.id ASC - |LIMIT 100 - """.stripMargin) - } - ignore("bi-13 from file: Popular Tags per month in a country") { val stages=compileFromFile("bi-13") } - ignore("bi-13: Popular Tags per month in a country") { - val stages = compile( - """// Q13. Popular Tags per month in a country - |/* - | :param { country: 'Burma' } - |*/ - |MATCH (:Country {name: $country})<-[:IS_LOCATED_IN]-(message:Message) - |OPTIONAL MATCH (message)-[:HAS_TAG]->(tag:Tag) - |WITH - | message.creationDate/10000000000000 AS year, - | message.creationDate/100000000000%100 AS month, - | message, - | tag - |WITH year, month, count(message) AS popularity, tag - |ORDER BY popularity DESC, tag.name ASC - |WITH - | year, - | month, - | collect([tag.name, popularity]) AS popularTags - |WITH - | year, - | month, - | [popularTag IN popularTags WHERE popularTag[0] IS NOT NULL] AS popularTags - |RETURN - | year, - | month, - | [i IN range(0, (CASE size(popularTags) < 5 WHEN true THEN size(popularTags) ELSE 5 END)-1) - | | popularTags[i]] AS topPopularTags - |ORDER BY - | year DESC, - | month ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-14 from file: Top thread initiators") { + test("bi-14 from file: Top thread initiators") { val stages=compileFromFile("bi-14") } - test("bi-14: Top thread initiators") { - val stages = compile( - """// Q14. Top thread initiators - |/* - | :param { - | startDate: 20120531220000000, - | endDate: 20120630220000000 - | } - |*/ - |MATCH (person:Person)<-[:HAS_CREATOR]-(post:Post)<-[:REPLY_OF*0..]-(reply:Message) - |WHERE post.creationDate >= $startDate - | AND post.creationDate <= $endDate - | AND reply.creationDate >= $startDate - | AND reply.creationDate <= $endDate - |RETURN - | person.id, - | person.firstName, - | person.lastName, - | count(DISTINCT post) AS threadCount, - | count(DISTINCT reply) AS messageCount - |ORDER BY - | messageCount DESC, - | person.id ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-15 from file: Social normals") { + test("bi-15 from file: Social normals") { val stages=compileFromFile("bi-15") } - test("bi-15: Social normals") { - val stages = compile( - """// Q15. Social normals - |/* - | :param { country: 'Burma' } - |*/ - |MATCH - | (country:Country {name: $country}) - |MATCH - | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person1:Person) - |OPTIONAL MATCH - | // start a new MATCH as friend might live in the same City - | // and thus can reuse the IS_PART_OF edge - | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(friend1:Person), - | (person1)-[:KNOWS]-(friend1) - |WITH country, person1, count(friend1) AS friend1Count - |WITH country, avg(friend1Count) AS socialNormalFloat - |WITH country, floor(socialNormalFloat) AS socialNormal - |MATCH - | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person2:Person) - |MATCH - | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(friend2:Person), - | (person2)-[:KNOWS]-(friend2) - |WITH country, person2, count(friend2) AS friend2Count, socialNormal - |WHERE friend2Count = socialNormal - |RETURN - | person2.id, - | friend2Count AS count - |ORDER BY - | person2.id ASC - |LIMIT 100 - """.stripMargin) - } - ignore("bi-16 from file: Experts in social circle") { val stages=compileFromFile("bi-16") } - ignore("bi-16: Experts in social circle") { - val stages = compile( - """// Q16. Experts in social circle - |/* - | :param { - | personId: 6597069777419, - | country: 'Pakistan', - | tagClass: 'MusicalArtist', - | minPathDistance: 3, - | maxPathDistance: 5 - | } - |*/ - |// This query will not work in a browser as is. I tried alternatives approaches, - |// e.g. enabling path of arbitrary lengths, saving the path to a variable p and - |// checking for `$minPathDistance <= length(p)`, but these could not be - |// evaluated due to the excessive amount of paths. - |// If you would like to test the query in the browser, replace the values of - |// $minPathDistance and $maxPathDistance to a constant. - |MATCH - | (:Person {id: $personId})-[:KNOWS*$minPathDistance..$maxPathDistance]- - | (person:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]-> - | (:Country {name: $country}), - | (person)<-[:HAS_CREATOR]-(message:Message)-[:HAS_TAG]->(:Tag)-[:HAS_TYPE]-> - | (:TagClass {name: $tagClass}) - |MATCH (message)-[:HAS_TAG]->(tag:Tag) - |RETURN - | person.id, - | tag.name, - | count(message) AS messageCount - |ORDER BY - | messageCount DESC, - | tag.name ASC, - | person.id ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-17 from file: Friend triangles") { + test("bi-17 from file: Friend triangles") { val stages=compileFromFile("bi-17") } - test("bi-17: Friend triangles") { - val stages = compile( - """// Q17. Friend triangles - |/* - | :param { country: 'Spain' } - |*/ - |MATCH (country:Country {name: $country}) - |MATCH (a:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) - |MATCH (b:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) - |MATCH (c:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) - |MATCH (a)-[:KNOWS]-(b), (b)-[:KNOWS]-(c), (c)-[:KNOWS]-(a) - |WHERE a.id < b.id - | AND b.id < c.id - |RETURN count(*) - |// as a less elegant solution, count(a) also works - """.stripMargin) - } - - ignore("bi-18 from file: How many persons have a given number of posts") { + test("bi-18 from file: How many persons have a given number of posts") { val stages=compileFromFile("bi-18") } - test("bi-18: How many persons have a given number of posts") { - val stages = compile( - """// Q18. How many persons have a given number of posts - |/* - | :param { - | date: 20110722000000000, - | lengthThreshold: 20, - | languages: ['ar'] - | } - |*/ - |MATCH (person:Person) - |OPTIONAL MATCH (person)<-[:HAS_CREATOR]-(message:Message)-[:REPLY_OF*0..]->(post:Post) - |WHERE message.content IS NOT NULL - | AND message.length < $lengthThreshold - | AND message.creationDate > $date - | AND post.language IN $languages - |WITH - | person, - | count(message) AS messageCount - |RETURN - | messageCount, - | count(person) AS personCount - |ORDER BY - | personCount DESC, - | messageCount DESC - """.stripMargin) - } - - ignore("bi-19 from file: Stranger's interaction") { + test("bi-19 from file: Stranger's interaction") { val stages=compileFromFile("bi-19") } - test("bi-19: Stranger's interaction") { - val stages = compile( - """// Q19. Stranger's interaction - |/* - | :param { - | date: 19890101, - | tagClass1: 'MusicalArtist', - | tagClass2: 'OfficeHolder' - | } - |*/ - |MATCH - | (:TagClass {name: $tagClass1})<-[:HAS_TYPE]-(:Tag)<-[:HAS_TAG]- - | (forum1:Forum)-[:HAS_MEMBER]->(stranger:Person) - |WITH DISTINCT stranger - |MATCH - | (:TagClass {name: $tagClass2})<-[:HAS_TYPE]-(:Tag)<-[:HAS_TAG]- - | (forum2:Forum)-[:HAS_MEMBER]->(stranger) - |WITH DISTINCT stranger - |MATCH - | (person:Person)<-[:HAS_CREATOR]-(:Message)-[:REPLY_OF]- - | (:Message)-[:HAS_CREATOR]->(stranger) - |WHERE person.birthday > $date - | AND person <> stranger - | AND NOT (person)-[:KNOWS]-(stranger) - |WITH person, stranger - |OPTIONAL MATCH - | (person)<-[:HAS_CREATOR]-(comment1:Comment)-[:REPLY_OF]->(:Message)-[:HAS_CREATOR]->(stranger) - |OPTIONAL MATCH - | (stranger)<-[:HAS_CREATOR]-(comment2:Comment)-[:REPLY_OF]->(:Message)-[:HAS_CREATOR]->(person) - |WITH - | person, - | count(stranger) AS strangersCount, - | count(comment1) AS comment1Count, - | count(comment2) AS comment2Count - |RETURN - | person.id, - | strangersCount, - | comment1Count + comment2Count AS interactionCount - |ORDER BY - | interactionCount DESC, - | person.id ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-20 from file: High-level topics") { + test("bi-20 from file: High-level topics") { val stages=compileFromFile("bi-20") } - test("bi-20: High-level topics") { - val stages = compile( - """// Q20. High-level topics - |/* - | :param { tagClasses: ['Writer', 'Single', 'Country'] } - |*/ - |UNWIND $tagClasses AS tagClassName - |MATCH - | (tagClass:TagClass {name: tagClassName})<-[:IS_SUBCLASS_OF*0..]- - | (:TagClass)<-[:HAS_TYPE]-(tag:Tag)<-[:HAS_TAG]-(message:Message) - |RETURN - | tagClass.name, - | count(message) AS postCount - |ORDER BY - | postCount DESC, - | tagClass.name ASC - |LIMIT 100 - """.stripMargin) - } - ignore("bi-21 from file: Zombies in a country") { val stages=compileFromFile("bi-21") } - ignore("bi-21: Zombies in a country") { - val stages = compile( - """// Q21. Zombies in a country - |/* - | :param { - | country: 'Ethiopia', - | endDate: 20130101000000000 - | } - |*/ - |MATCH (country:Country {name: $country}) - |WITH - | country, - | $endDate/10000000000000 AS endDateYear, - | $endDate/100000000000%100 AS endDateMonth - |MATCH - | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(zombie:Person) - |OPTIONAL MATCH - | (zombie)<-[:HAS_CREATOR]-(message:Message) - |WHERE zombie.creationDate < $endDate - | AND message.creationDate < $endDate - |WITH - | country, - | zombie, - | endDateYear, - | endDateMonth, - | zombie.creationDate/10000000000000 AS zombieCreationYear, - | zombie.creationDate/100000000000%100 AS zombieCreationMonth, - | count(message) AS messageCount - |WITH - | country, - | zombie, - | 12 * (endDateYear - zombieCreationYear ) - | + (endDateMonth - zombieCreationMonth) - | + 1 AS months, - | messageCount - |WHERE messageCount / months < 1 - |WITH - | country, - | collect(zombie) AS zombies - |UNWIND zombies AS zombie - |OPTIONAL MATCH - | (zombie)<-[:HAS_CREATOR]-(message:Message)<-[:LIKES]-(likerZombie:Person) - |WHERE likerZombie IN zombies - |WITH - | zombie, - | count(likerZombie) AS zombieLikeCount - |OPTIONAL MATCH - | (zombie)<-[:HAS_CREATOR]-(message:Message)<-[:LIKES]-(likerPerson:Person) - |WHERE likerPerson.creationDate < $endDate - |WITH - | zombie, - | zombieLikeCount, - | count(likerPerson) AS totalLikeCount - |RETURN - | zombie.id, - | zombieLikeCount, - | totalLikeCount, - | CASE totalLikeCount - | WHEN 0 THEN 0 - | ELSE zombieLikeCount / toFloat(totalLikeCount) - | END AS zombieScore - |ORDER BY - | zombieScore DESC, - | zombie.id ASC - |LIMIT 100 - """.stripMargin) - } - ignore("bi-22 from file: International dialog") { val stages=compileFromFile("bi-22") } - ignore("bi-22: International dialog") { - val stages = compile( - """// Q22. International dialog - |/* - | :param { - | country1: 'Mexico', - | country2: 'Indonesia' - | } - |*/ - |MATCH - | (country1:Country {name: $country1})<-[:IS_PART_OF]-(city1:City)<-[:IS_LOCATED_IN]-(person1:Person), - | (country2:Country {name: $country2})<-[:IS_PART_OF]-(city2:City)<-[:IS_LOCATED_IN]-(person2:Person) - |WITH person1, person2, city1, 0 AS score - |// subscore 1 - |OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(:Message)-[:HAS_CREATOR]->(person2) - |WITH DISTINCT person1, person2, city1, score + (CASE c WHEN null THEN 0 ELSE 4 END) AS score - |// subscore 2 - |OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(m:Message)<-[:REPLY_OF]-(:Comment)-[:HAS_CREATOR]->(person2) - |WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE 1 END) AS score - |// subscore 3 - |OPTIONAL MATCH (person1)-[k:KNOWS]-(person2) - |WITH DISTINCT person1, person2, city1, score + (CASE k WHEN null THEN 0 ELSE 15 END) AS score - |// subscore 4 - |OPTIONAL MATCH (person1)-[:LIKES]->(m:Message)-[:HAS_CREATOR]->(person2) - |WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE 10 END) AS score - |// subscore 5 - |OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(m:Message)<-[:LIKES]-(person2) - |WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE 1 END) AS score - |// preorder - |ORDER BY - | city1.name ASC, - | score DESC, - | person1.id ASC, - | person2.id ASC - |WITH - | city1, - | // using a list might be faster, but the browser query editor does not like it - | collect({score: score, person1: person1, person2: person2})[0] AS top - |RETURN - | top.person1.id, - | top.person2.id, - | city1.name, - | top.score - |ORDER BY - | top.score DESC, - | top.person1.id ASC, - | top.person2.id ASC - """.stripMargin) - } - - ignore("bi-23 from file: Holiday destinations") { + test("bi-23 from file: Holiday destinations") { val stages=compileFromFile("bi-23") } - test("bi-23: Holiday destinations") { - val stages = compile( - """// Q23. Holiday destinations - |/* - | :param { country: 'Egypt' } - |*/ - |MATCH - | (home:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- - | (:Person)<-[:HAS_CREATOR]-(message:Message)-[:IS_LOCATED_IN]->(destination:Country) - |WHERE home <> destination - |WITH - | message, - | destination, - | message.creationDate/100000000000%100 AS month - |RETURN - | count(message) AS messageCount, - | destination.name, - | month - |ORDER BY - | messageCount DESC, - | destination.name ASC, - | month ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-24 from file: Messages by Topic and Continent") { + test("bi-24 from file: Messages by Topic and Continent") { val stages=compileFromFile("bi-24") } - test("bi-24: Messages by Topic and Continent") { - val stages = compile( - """// Q24. Messages by Topic and Continent - |/* - | :param { tagClass: 'Single' } - |*/ - |MATCH (:TagClass {name: $tagClass})<-[:HAS_TYPE]-(:Tag)<-[:HAS_TAG]-(message:Message) - |WITH DISTINCT message - |MATCH (message)-[:IS_LOCATED_IN]->(:Country)-[:IS_PART_OF]->(continent:Continent) - |OPTIONAL MATCH (message)<-[like:LIKES]-(:Person) - |WITH - | message, - | message.creationDate/10000000000000 AS year, - | message.creationDate/100000000000%100 AS month, - | like, - | continent - |RETURN - | count(DISTINCT message) AS messageCount, - | count(like) AS likeCount, - | year, - | month, - | continent.name - |ORDER BY - | year ASC, - | month ASC, - | continent.name DESC - |LIMIT 100 - """.stripMargin) - } - ignore("bi-25 from file: Weighted paths") { val stages=compileFromFile("bi-25") } - ignore("bi-25: Weighted paths") { - val stages = compile( - """// Q25. Weighted paths - |/* - | :param { - | person1Id: 2199023264119, - | person2Id: 8796093028894, - | startDate: 20100601040000000, - | endDate: 20100701040000000 - | } - |*/ - |MATCH path = (p1:Person {id: $person1Id})-[:KNOWS*]-(p2:Person {id: $person2Id}) - |WITH p1, p2, path - |ORDER BY length(path) - |WITH p1, p2, collect(path)[0] AS path // select the shortest path - |UNWIND relationships(path) AS k - |WITH - | path, - | startNode(k) AS pA, - | endNode(k) AS pB, - | 0 AS weight - |// A to B - |// every reply (by one of the Persons) to a Post (by the other Person): 1.0 - |OPTIONAL MATCH - | (pA)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(m:Post)-[:HAS_CREATOR]->(pB), - | (m)<-[:CONTAINER_OF]-(forum:Forum) - |WHERE forum.creationDate >= $startDate AND forum.creationDate <= $endDate - |WITH path, pA, pB, weight + count(c)*1.0 AS weight - |// A to B - |// every reply (by ones of the Persons) to a Comment (by the other Person): 0.5 - |OPTIONAL MATCH - | (pA)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(m:Comment)-[:HAS_CREATOR]->(pB), - | (m)<-[:CONTAINER_OF]-(forum:Forum) - |WHERE forum.creationDate >= $startDate AND forum.creationDate <= $endDate - |WITH path, pA, pB, weight + count(c)*0.5 AS weight - |// B to A - |// every reply (by one of the Persons) to a Post (by the other Person): 1.0 - |OPTIONAL MATCH - | (pB)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(m:Post)-[:HAS_CREATOR]->(pA), - | (m)<-[:CONTAINER_OF]-(forum:Forum) - |WHERE forum.creationDate >= $startDate AND forum.creationDate <= $endDate - |WITH path, pA, pB, weight + count(c)*1.0 AS weight - |// B to A - |// every reply (by ones of the Persons) to a Comment (by the other Person): 0.5 - |OPTIONAL MATCH - | (pB)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(m:Comment)-[:HAS_CREATOR]->(pA), - | (m)<-[:CONTAINER_OF]-(forum:Forum) - |WHERE forum.creationDate >= $startDate AND forum.creationDate <= $endDate - |WITH path, pA, pB, weight + count(c)*0.5 AS weight - |RETURN - | [person IN nodes(path) | person.id] - |ORDER BY - | weight DESC - """.stripMargin) - } } diff --git a/compiler/src/test/scala/ingraph/sandbox/CompilerTest.scala b/compiler/src/test/scala/ingraph/sandbox/CompilerTest.scala index 0508ca406..cdf6fed31 100644 --- a/compiler/src/test/scala/ingraph/sandbox/CompilerTest.scala +++ b/compiler/src/test/scala/ingraph/sandbox/CompilerTest.scala @@ -26,7 +26,7 @@ case class CompilerTestConfig(querySuitePath: Option[String] = None def printAny: Boolean = printQuery || printCypher || printQPlan || printJPlan || printFPlan } -class CompilerTest extends FunSuite { +abstract class CompilerTest extends FunSuite { val config = CompilerTestConfig() val separatorLength = 77 diff --git a/compiler/src/test/scala/ingraph/sandbox/DmlTest.scala b/compiler/src/test/scala/ingraph/sandbox/DmlTest.scala index 5a70cd389..9268f0ab3 100644 --- a/compiler/src/test/scala/ingraph/sandbox/DmlTest.scala +++ b/compiler/src/test/scala/ingraph/sandbox/DmlTest.scala @@ -29,19 +29,19 @@ class DmlTest extends FunSuite { println(plan) } - ignore("should compile simple DELETE for a vertex") { + test("should compile simple DELETE for a vertex") { val cypher = CypherParser.parseString("MATCH (n:Person) DELETE n") val plan = CypherToQPlan.build(cypher) println(plan) } - ignore("should compile simple DELETE for an edge") { + test("should compile simple DELETE for an edge") { val cypher = CypherParser.parseString("MATCH (p1:Person)-[r:KNOWS]-(p2:Person) DELETE r") val plan = CypherToQPlan.build(cypher) println(plan) } - ignore("should compile simple DELETE for an entire pattern") { + test("should compile simple DELETE for an entire pattern") { val cypher = CypherParser.parseString("MATCH (p1:Person)-[r:KNOWS]-(p2:Person) DELETE p1, r, p2") val plan = CypherToQPlan.build(cypher) println(plan) diff --git a/expression-parser/src/main/scala/ingraph/expressionparser/ExpressionParser.scala b/expression-parser/src/main/scala/ingraph/expressionparser/ExpressionParser.scala index 627b0696e..4568f63da 100644 --- a/expression-parser/src/main/scala/ingraph/expressionparser/ExpressionParser.scala +++ b/expression-parser/src/main/scala/ingraph/expressionparser/ExpressionParser.scala @@ -1,16 +1,18 @@ package ingraph.parse import hu.bme.mit.ire.datatypes.Tuple +import hu.bme.mit.ire.nodes.unary.aggregation._ import hu.bme.mit.ire.util.GenericMath import ingraph.expressionparser.FunctionLookup -import ingraph.model.expr.{FunctionInvocation, TupleIndexLiteralAttribute} +import ingraph.model.expr.{FunctionInvocation, Parameter, TupleIndexLiteralAttribute} +import ingraph.model.misc.FunctionCategory import org.apache.spark.sql.catalyst.expressions.{Add, And, BinaryArithmetic, BinaryComparison, BinaryOperator, CaseWhen, Divide, EqualTo, Expression, GreaterThan, GreaterThanOrEqual, IsNotNull, IsNull, LessThan, LessThanOrEqual, Literal, Multiply, Not, Or, Pmod, Remainder, Subtract} import org.apache.spark.unsafe.types.UTF8String object ExpressionParser { def apply[T](exp: Expression): Tuple => T = { - val e = parse(exp) + val e: Tuple => Any = parse(exp) t => e(t).asInstanceOf[T] } @@ -64,10 +66,18 @@ object ExpressionParser { case invoc: FunctionInvocation => val children: Seq[Tuple => Any] = invoc.children.map(parse) children.length match { - case 0 => tuple => FunctionLookup.fun0(invoc.functor)() - case 1 => tuple => FunctionLookup.fun1(invoc.functor)(children.head(tuple)) - case 2 => tuple => FunctionLookup.fun2(invoc.functor)(children(0)(tuple), children(1)(tuple)) - case 3 => tuple => FunctionLookup.fun3(invoc.functor)(children(0)(tuple), children(1)(tuple), children(2)(tuple)) + case 0 => + val fun = FunctionLookup.fun0(invoc.functor) + tuple => fun() + case 1 => + val fun = FunctionLookup.fun1(invoc.functor) + tuple => fun(children.head(tuple)) + case 2 => + val fun = FunctionLookup.fun2(invoc.functor) + tuple => fun(children(0)(tuple), children(1)(tuple)) + case 3 => + val fun = FunctionLookup.fun3(invoc.functor) + tuple => fun(children(0)(tuple), children(1)(tuple), children(2)(tuple)) } case exp: CaseWhen => println(exp) @@ -81,27 +91,22 @@ object ExpressionParser { fallback(tuple) } caseFunction + case par: Parameter => tuple => "ot" } -// -// - -// def parseAggregate(exp: Expression, lookup: Map[String, Integer]): List[(String, () => StatefulAggregate)] = exp match { -// case exp: FunctionExpression if exp.getFunctor.getCategory == FunctionCategory.AGGREGATION => -// if (exp.getFunctor != COLLECT) { -// val variable = exp.getArguments.get(0).asInstanceOf[VariableExpression].getVariable -// val index = lookup(variable.fullName) -// List((exp.fullName, exp.getFunctor match { -// case AVG => () => new StatefulAverage(index) -// case COUNT => () => new NullAwareStatefulCount(index) -// case COUNT_ALL => () => new StatefulCount() -// case MAX => () => new StatefulMax(index) -// case MIN => () => new StatefulMin(index) -// case SUM => () => new StatefulSum(index) -// })) -// } else { -// val list = parseListExpression(exp.getArguments.get(0).asInstanceOf[ListExpression]) -// val indices = list.map(e => lookup(e.asInstanceOf[VariableExpression].getVariable.fullName)).map(_.toInt) -// List((exp.fullName, () => new StatefulCollect(indices))) -// } + def parseAggregate(exp: Expression): Option[(Int, () => StatefulAggregate)] = exp match { + case FunctionInvocation(functor, Seq(TupleIndexLiteralAttribute(index, _)), _) => + import ingraph.model.misc.Function._ + val factory = functor match { + case AVG => () => new StatefulAverage(index) + case COUNT => () => new NullAwareStatefulCount(index) + case COUNT_ALL => () => new StatefulCount() + case MAX => () => new StatefulMax(index) + case MIN => () => new StatefulMin(index) + case SUM => () => new StatefulSum(index) + } + Some((index, factory)) + //TODO: collect + case _ => None + } } diff --git a/ire-adapter/src/main/scala/ingraph/ire/EngineFactory.scala b/ire-adapter/src/main/scala/ingraph/ire/EngineFactory.scala index 85e4024f1..442147d41 100644 --- a/ire-adapter/src/main/scala/ingraph/ire/EngineFactory.scala +++ b/ire-adapter/src/main/scala/ingraph/ire/EngineFactory.scala @@ -7,6 +7,7 @@ import hu.bme.mit.ire.engine.RelationalEngine import hu.bme.mit.ire.messages.{ChangeSet, ReteMessage} import hu.bme.mit.ire.nodes.binary.{AntiJoinNode, JoinNode, LeftOuterJoinNode, UnionNode} import hu.bme.mit.ire.nodes.unary._ +import hu.bme.mit.ire.nodes.unary.aggregation.{AggregationNode, StatefulAggregate} import hu.bme.mit.ire.util.BufferMultimap import hu.bme.mit.ire.util.Utils.conversions._ import ingraph.model.expr._ @@ -53,7 +54,7 @@ object EngineFactory { case op: UnaryTNode => val node: (ReteMessage) => Unit = op match { case op: Production => production -// case op: Grouping => instantiateGrouping(op, expr) + case op: Grouping => grouping(op, expr) case op: SortAndTop => sortAndTop(op, expr) case op: Selection => selection(op, expr) case op: Projection => projection(op, expr) @@ -126,21 +127,20 @@ object EngineFactory { // unary nodes -// private def instantiateGrouping(op: Grouping, expr: ForwardConnection) = { -// val variableLookup = getSchema(op.child) -// val aggregates = op.fnode.jnode.projectList.flatMap( -// e => ExpressionParser.parseAggregate(e, variableLookup) -// ) -// val functions = () => aggregates.map( -// _._2() // GOOD LUCK UNDERSTANDING THIS -// ) -// val aggregationCriteria = op.fnode.jnode.aggregationCriteria.map(e => (e, ExpressionParser.parseValue(e, variableLookup))) -// val projectionVariableLookup: Map[String, Int] = -// aggregationCriteria.zipWithIndex.map( a => a._1._1.fullName -> a._2.asInstanceOf[Integer] ).toMap ++ -// aggregates.zipWithIndex.map( az => az._1._1 -> (az._2 + op.fnode.jnode.aggregationCriteria.size())) -// val projectionExpressions = op.internalSchema.map( e => ExpressionParser.parseValue(e, projectionVariableLookup)) -// newLocal(Props(new AggregationNode(expr.child, aggregationCriteria.map(_._2), functions, projectionExpressions))) -// } + private def grouping(op: Grouping, expr: ForwardConnection) = { + val aggregates = op.projectionTuple.map(e => ExpressionParser.parseAggregate(e)) + val factories = aggregates.flatten.map(_._2()).toVector + val nonAggregates = op.projectionTuple.filter(op.aggregationCriteria.contains) + var normalIndex = 0 + var aggregateIndex = op.projectionTuple.size - op.aggregationCriteria.size - 1 + val projections = aggregates.map { + case None => normalIndex += 1; normalIndex - 1 + case Some(_) => aggregateIndex += 1; aggregateIndex - 1 + } + val aggregationMask = op.aggregationCriteria.map(e => ExpressionParser[Any](e)).toVector + + newLocal(Props(new AggregationNode(expr.child, aggregationMask, () => factories, projections.toVector))) + } private def selection(op: Selection, expr: ForwardConnection) = { newLocal(Props(new SelectionNode(expr.child, ExpressionParser[Boolean](op.condition)))) diff --git a/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala b/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala index f6d294da2..797b6ba5b 100644 --- a/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala +++ b/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala @@ -1,25 +1,120 @@ package ingraph.ire -import org.scalatest.FunSuite +class BiEngineTest extends EngineTest { -class BiEngineTest extends FunSuite { + override val queryDir: String = "ldbc-snb-bi" - ignore("test 1") { - val indexer = new Indexer() + test("bi-101: Count persons") { + run("""MATCH (p:Person)-[:IS_LOCATED_IN]->(c:country) + |RETURN p, count(c) AS cc + """.stripMargin) + } + + // compiles + ignore("bi-01 from file: Posting summary") { + runFromFile("bi-01") + } + + ignore("bi-02 from file: Top tags for country, age, gender, time") { + runFromFile("bi-02") + } + + ignore("bi-03 from file: Tag evolution") { + runFromFile("bi-03") + } + + test("bi-04 from file: Popular topics in a country") { + runFromFile("bi-04") + } + + ignore("bi-05 from file: Top posters in a country") { + runFromFile("bi-05") + } + + test("bi-06 from file: Most active Posters of a given Topic") { + runFromFile("bi-06") + } + + test("bi-07 from file: Most authoritative users on a given topic") { + runFromFile("bi-07") + } + + test("bi-08 from file: Related Topics") { + runFromFile("bi-08") + } + + // compiles + ignore("bi-09 from file: Forum with related Tags") { + runFromFile("bi-09") + } + + ignore("bi-10 from file: Central Person for a Tag") { + runFromFile("bi-10") + } - val readQuery = - """MATCH (country:Country {name: 'Austria'}) - |MATCH (a:Person)-[:isLocatedIn]->(:City)-[:isPartOf]->(country) - |MATCH (b:Person)-[:isLocatedIn]->(:City)-[:isPartOf]->(country) - |MATCH (c:Person)-[:isLocatedIn]->(:City)-[:isPartOf]->(country) - |MATCH (a)-[:knows]-(b), (b)-[:knows]-(c), (c)-[:knows]-(a) - |WHERE a.id < b.id - | AND b.id < c.id - |RETURN count(*) - """.stripMargin + ignore("bi-11 from file: Unrelated replies") { + runFromFile("bi-11") + } + + test("bi-12 from file: Trending Posts") { + runFromFile("bi-12") + } - val readAdapter = new IngraphIncrementalAdapter(readQuery, "read", indexer) - val result = readAdapter.result() + ignore("bi-13 from file: Popular Tags per month in a country") { + runFromFile("bi-13") } + // compiles + ignore("bi-14 from file: Top thread initiators") { + runFromFile("bi-14") + } + + // compiles + ignore("bi-15 from file: Social normals") { + runFromFile("bi-15") + } + + ignore("bi-16 from file: Experts in social circle") { + runFromFile("bi-16") + } + + test("bi-17 from file: Friend triangles") { + runFromFile("bi-17") + } + + // compiles + ignore("bi-18 from file: How many persons have a given number of posts") { + runFromFile("bi-18") + } + + // compiles + ignore("bi-19 from file: Stranger's interaction") { + runFromFile("bi-19") + } + + // compiles + ignore("bi-20 from file: High-level topics") { + runFromFile("bi-20") + } + + ignore("bi-21 from file: Zombies in a country") { + runFromFile("bi-21") + } + + ignore("bi-22 from file: International dialog") { + runFromFile("bi-22") + } + + test("bi-23 from file: Holiday destinations") { + runFromFile("bi-23") + } + + // compiles + ignore ("bi-24 from file: Messages by Topic and Continent") { + runFromFile("bi-24") + } + + ignore("bi-25 from file: Weighted paths") { + runFromFile("bi-25") + } } diff --git a/ire-adapter/src/test/scala/ingraph/ire/EngineTest.scala b/ire-adapter/src/test/scala/ingraph/ire/EngineTest.scala new file mode 100644 index 000000000..8066a0768 --- /dev/null +++ b/ire-adapter/src/test/scala/ingraph/ire/EngineTest.scala @@ -0,0 +1,21 @@ +package ingraph.ire + +import org.scalatest.FunSuite + +abstract class EngineTest extends FunSuite { + + val queryDir: String + + def run(readQuery: String): Unit = { + val indexer = new Indexer() + val readAdapter = new IngraphIncrementalAdapter(readQuery, "read", indexer) + val result = readAdapter.result() + } + + def runFromFile(fileBaseName: String): Unit = { + val source = scala.io.Source.fromFile(s"../queries/${queryDir}/${fileBaseName}.cypher") + val queryString = try source.getLines.mkString("\n") finally source.close() + run(queryString) + } + +} diff --git a/ire-adapter/src/test/scala/ingraph/ire/TrainbenchmarkBatchIntegrationTest.scala b/ire-adapter/src/test/scala/ingraph/ire/TrainbenchmarkBatchIntegrationTest.scala index 0a9501345..59e6a7ec7 100644 --- a/ire-adapter/src/test/scala/ingraph/ire/TrainbenchmarkBatchIntegrationTest.scala +++ b/ire-adapter/src/test/scala/ingraph/ire/TrainbenchmarkBatchIntegrationTest.scala @@ -41,4 +41,10 @@ class TrainbenchmarkBatchIntegrationTest extends FunSuite { val expected = (7 to 9).reverse.map(n => Vector(n.toLong)) assert(results == expected) } + + test("basic aggregations") { + val query = "MATCH (s: Switch) RETURN count(s)" + val results = TrainbenchmarkUtils.readModelAndGetResults(query, 1) + assert(results == List(Vector(40))) + } } diff --git a/ire/src/main/scala/hu/bme/mit/ire/nodes/unary/aggregation/AggregationNode.scala b/ire/src/main/scala/hu/bme/mit/ire/nodes/unary/aggregation/AggregationNode.scala index ef53a1e79..9a7741aa9 100755 --- a/ire/src/main/scala/hu/bme/mit/ire/nodes/unary/aggregation/AggregationNode.scala +++ b/ire/src/main/scala/hu/bme/mit/ire/nodes/unary/aggregation/AggregationNode.scala @@ -9,15 +9,15 @@ import scala.collection.immutable.VectorBuilder import scala.collection.mutable class AggregationNode(override val next: (ReteMessage) => Unit, - mask: Vector[Tuple => Any], functions: () => Vector[StatefulAggregate], - projection: Vector[Tuple => Any]) extends UnaryNode with SingleForwarder { + mask: Vector[Tuple => Any], factories: () => Vector[StatefulAggregate], + projection: Vector[Int]) extends UnaryNode with SingleForwarder { private val keyCount = mutable.Map[Tuple, Int]().withDefault(f => 0) - private val data = mutable.Map[Tuple, Vector[StatefulAggregate]]().withDefault(f => functions()) + private val data = mutable.Map[Tuple, Vector[StatefulAggregate]]().withDefault(f => factories()) override def onChangeSet(changeSet: ChangeSet): Unit = { val oldValues = mutable.Map[Tuple, (Tuple, Int)]() for ((key, tuples) <- changeSet.positive.groupBy(t => mask.map(m => m(t)))) { - val aggregators = data.getOrElseUpdate(key, functions()) + val aggregators = data.getOrElseUpdate(key, factories()) oldValues.getOrElseUpdate(key, (aggregators.map(_.value()), keyCount(key))) for (aggregator <- aggregators) @@ -25,7 +25,7 @@ class AggregationNode(override val next: (ReteMessage) => Unit, keyCount(key) += tuples.size } for ((key, tuples) <- changeSet.negative.groupBy(t => mask.map(m => m(t)))) { - val aggregators = data.getOrElseUpdate(key, functions()) + val aggregators = data.getOrElseUpdate(key, factories()) oldValues.getOrElseUpdate(key, (aggregators.map(_.value()), keyCount(key))) for (aggregator <- aggregators) aggregator.maintainNegative(tuples) @@ -44,8 +44,8 @@ class AggregationNode(override val next: (ReteMessage) => Unit, negative += key ++ oldValues } } - val positiveBag: TupleBag = positive.result().map((t: Tuple) => projection.map(_(t))) - val negativeBag: TupleBag = negative.result().map((t: Tuple) => projection.map(_(t))) + val positiveBag: TupleBag = positive.result().map(t => projection.map(t)) + val negativeBag: TupleBag = negative.result().map(t => projection.map(t)) forward(ChangeSet( positive = positiveBag, negative = negativeBag)) diff --git a/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/AggregationNodeTest.scala b/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/AggregationNodeTest.scala index c5e28db82..e4586252e 100755 --- a/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/AggregationNodeTest.scala +++ b/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/AggregationNodeTest.scala @@ -30,7 +30,7 @@ class AggregationNodeTest(_system: ActorSystem) extends TestKit(_system) with Im "count with complex keys" in { val echoActor = system.actorOf(TestActors.echoActorProps) val counter = system.actorOf(Props(new AggregationNode(echoActor ! _, functionMask(3, 0), - () => Vector(new StatefulCount()), functionMask(0, 1, 2)))) // sex and the city + () => Vector(new StatefulCount()), Vector(0, 1, 2)))) // sex and the city counter ! ChangeSet(positive = tupleBag(odin)) expectMsg(ChangeSet(positive = tupleBag(tuple("male", "Asgard", 1)))) counter ! ChangeSet(positive = tupleBag(thor)) @@ -54,7 +54,7 @@ class AggregationNodeTest(_system: ActorSystem) extends TestKit(_system) with Im "collect with complex keys" in { val echoActor = system.actorOf(TestActors.echoActorProps) val counter = system.actorOf(Props(new AggregationNode(echoActor ! _, functionMask(3, 0), - () => Vector(new StatefulCollect(Vector(2))), functionMask(0, 1, 2)))) // (sex, city): (weapon) + () => Vector(new StatefulCollect(Vector(2))), Vector(0, 1, 2)))) // (sex, city): (weapon) counter ! ChangeSet(positive = tupleBag(odin)) expectMsg(ChangeSet(positive = tupleBag(tuple("male", "Asgard", cypherList(Vector("Gungnir")))))) counter ! ChangeSet(positive = tupleBag(thor)) @@ -99,7 +99,7 @@ class AggregationNodeTest(_system: ActorSystem) extends TestKit(_system) with Im "sum with complex keys" in { val echoActor = system.actorOf(TestActors.echoActorProps) val counter = system.actorOf(Props(new AggregationNode(echoActor ! _, functionMask(3), - () => Vector(new StatefulSum(4)), functionMask(0, 1)))) // sex, sum for height + () => Vector(new StatefulSum(4)), Vector(0, 1)))) // sex, sum for height counter ! ChangeSet(positive = tupleBag(odin)) assertNextChangeSetWithTolerance(key = 1, positive = Some(1)) counter ! ChangeSet(positive = tupleBag(thor)) @@ -113,7 +113,7 @@ class AggregationNodeTest(_system: ActorSystem) extends TestKit(_system) with Im "average with complex keys" in { val echoActor = system.actorOf(TestActors.echoActorProps) val counter = system.actorOf(Props(new AggregationNode(echoActor ! _, functionMask(3), - () => Vector(new StatefulAverage(4)), functionMask(0, 1)))) // sex, sum for height + () => Vector(new StatefulAverage(4)), Vector(0, 1)))) // sex, sum for height counter ! ChangeSet(positive = tupleBag(odin)) assertNextChangeSetWithTolerance(key = 1, positive = Some(1)) counter ! ChangeSet(positive = tupleBag(thor)) @@ -135,7 +135,7 @@ class AggregationNodeTest(_system: ActorSystem) extends TestKit(_system) with Im val echoActor = system.actorOf(TestActors.echoActorProps) val stddev = system.actorOf(Props(new AggregationNode(echoActor ! _, functionMask(3), - () => Vector(new StatefulStandardDeviation(4)), functionMask(0, 1)))) // sex, sum for height + () => Vector(new StatefulStandardDeviation(4)), Vector(0, 1)))) // sex, sum for height stddev ! ChangeSet(positive = tupleBag(t1)) assertNextChangeSetWithTolerance(key = 1, positive = Some(0)) stddev ! ChangeSet(positive = tupleBag(t2)) @@ -165,7 +165,7 @@ class AggregationNodeTest(_system: ActorSystem) extends TestKit(_system) with Im val echoActor = system.actorOf(TestActors.echoActorProps) val stddev = system.actorOf(Props(new AggregationNode(echoActor ! _, functionMask(3), - () => Vector(new StatefulStandardDeviationSample(4)), functionMask(0, 1)))) // sex, sum for height + () => Vector(new StatefulStandardDeviationSample(4)), Vector(0, 1)))) // sex, sum for height stddev ! ChangeSet(positive = tupleBag(t1)) assertNextChangeSetWithTolerance(key = 1, positive = Some(0)) stddev ! ChangeSet(positive = tupleBag(t2)) diff --git a/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/MaxNodeTest.scala b/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/MaxNodeTest.scala index 01e4509ca..3131a4254 100644 --- a/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/MaxNodeTest.scala +++ b/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/MaxNodeTest.scala @@ -23,7 +23,7 @@ class MaxNodeTest(_system: ActorSystem) extends TestKit(_system) with ImplicitSe ) val echoActor = system.actorOf(TestActors.echoActorProps) val max = system.actorOf(Props(new AggregationNode( - echoActor ! _, functionMask(0), () => Vector(new StatefulMax(1)), functionMask(0, 1)))) + echoActor ! _, functionMask(0), () => Vector(new StatefulMax(1)), Vector(0, 1)))) max ! changeSet expectMsg(ChangeSet( diff --git a/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/MinNodeTest.scala b/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/MinNodeTest.scala index 5b90bcd65..a156c3526 100644 --- a/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/MinNodeTest.scala +++ b/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/MinNodeTest.scala @@ -23,7 +23,7 @@ class MinNodeTest(_system: ActorSystem) extends TestKit(_system) with ImplicitSe ) val echoActor = system.actorOf(TestActors.echoActorProps) val min = system.actorOf(Props( - new AggregationNode(echoActor ! _, functionMask(0), () => Vector(new StatefulMin(1)), functionMask(0, 1)))) + new AggregationNode(echoActor ! _, functionMask(0), () => Vector(new StatefulMin(1)), Vector(0, 1)))) min ! changeSet expectMsg(ChangeSet(