From e13500b429aa4678163ffaf6ef324fd2327baf57 Mon Sep 17 00:00:00 2001 From: Gabor Szarnyas Date: Fri, 26 Jan 2018 21:47:40 +0100 Subject: [PATCH 01/11] Add tests (broken) --- .../test/scala/ingraph/ire/BiEngineTest.scala | 884 +++++++++++++++++- .../test/scala/ingraph/ire/EngineTest.scala | 13 + 2 files changed, 884 insertions(+), 13 deletions(-) create mode 100644 ire-adapter/src/test/scala/ingraph/ire/EngineTest.scala diff --git a/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala b/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala index f6d294da2..abf37ea13 100644 --- a/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala +++ b/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala @@ -1,25 +1,883 @@ package ingraph.ire -import org.scalatest.FunSuite +class BiEngineTest extends EngineTest { -class BiEngineTest extends FunSuite { + test("bi-01: Posting summary") { + val stages = run( + """// Q1. Posting summary + |/* + | :param { date: 20110721220000000 } + |*/ + |MATCH (message:Message) + |WHERE message.creationDate <= $date + |WITH toFloat(count(message)) AS totalMessageCount // this should be a subquery once Cypher supports it + |MATCH (message:Message) + |WHERE message.creationDate <= $date + | AND message.content IS NOT NULL + |WITH + | totalMessageCount, + | message, + | message.creationDate/10000000000000 AS year + |WITH + | totalMessageCount, + | year, + | (message:Comment) AS isComment, + | CASE + | WHEN message.length < 40 THEN 0 + | WHEN message.length < 80 THEN 1 + | WHEN message.length < 160 THEN 2 + | ELSE 3 + | END AS lengthCategory, + | count(message) AS messageCount, + | floor(avg(message.length)) AS averageMessageLength, + | sum(message.length) AS sumMessageLength + |RETURN + | year, + | isComment, + | lengthCategory, + | messageCount, + | averageMessageLength, + | sumMessageLength, + | messageCount / totalMessageCount AS percentageOfMessages + |ORDER BY + | year DESC, + | isComment ASC, + | lengthCategory ASC + """.stripMargin) + } + + test("bi-02: Top tags for country, age, gender, time") { + val stages = run( + """// Q2. Top tags for country, age, gender, time + |/* + | :param { + | date1: 20091231230000000, + | date2: 20101107230000000, + | country1: 'Ethiopia', + | country2: 'Belarus' + | } + |*/ + |MATCH + | (country:Country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person:Person) + | <-[:HAS_CREATOR]-(message:Message)-[:HAS_TAG]->(tag:Tag) + |WHERE message.creationDate >= $startDate + | AND message.creationDate <= $endDate + | AND (country.name = $country1 OR country.name = $country2) + |WITH + | country.name AS countryName, + | message.creationDate/100000000000%100 AS month, + | person.gender AS gender, + | floor((20130101 - person.birthday) / 10000 / 5.0) AS ageGroup, + | tag.name AS tagName, + | message + |WITH + | countryName, month, gender, ageGroup, tagName, count(message) AS messageCount + |WHERE messageCount > 100 + |RETURN + | countryName, + | month, + | gender, + | ageGroup, + | tagName, + | messageCount + |ORDER BY + | messageCount DESC, + | tagName ASC, + | ageGroup ASC, + | gender ASC, + | month ASC, + | countryName ASC + |LIMIT 100 + """.stripMargin) + } + + test("bi-03: Tag evolution") { + val stages = run( + """// Q3. Tag evolution + |/* + | :param { + | year: 2010, + | month: 10 + | } + |*/ + |WITH + | $year AS year1, + | $month AS month1, + | $year + toInteger($month / 12.0) AS year2, + | $month % 12 + 1 AS month2 + |// year-month 1 + |MATCH (tag:Tag) + |OPTIONAL MATCH (message1:Message)-[:HAS_TAG]->(tag) + | WHERE message1.creationDate/10000000000000 = year1 + | AND message1.creationDate/100000000000%100 = month1 + |WITH year2, month2, tag, count(message1) AS countMonth1 + |// year-month 2 + |OPTIONAL MATCH (message2:Message)-[:HAS_TAG]->(tag) + | WHERE message2.creationDate/10000000000000 = year2 + | AND message2.creationDate/100000000000%100 = month2 + |WITH + | tag, + | countMonth1, + | count(message2) AS countMonth2 + |RETURN + | tag.name, + | countMonth1, + | countMonth2, + | abs(countMonth1-countMonth2) AS diff + |ORDER BY + | diff DESC, + | tag.name ASC + |LIMIT 100 + """.stripMargin) + } + + test("bi-04: Popular topics in a country") { + val stages = run( + """// Q4. Popular topics in a country + |/* + | :param { + | tagClass: 'MusicalArtist', + | country: 'Burma' + | } + |*/ + |MATCH + | (:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- + | (person:Person)<-[:HAS_MODERATOR]-(forum:Forum)-[:CONTAINER_OF]-> + | (post:Post)-[:HAS_TAG]->(:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass}) + |RETURN + | forum.id, + | forum.title, + | forum.creationDate, + | person.id, + | count(DISTINCT post) AS postCount + |ORDER BY + | postCount DESC, + | forum.id ASC + |LIMIT 20 """.stripMargin) + } + + test("bi-05: Top posters in a country") { + val stages = run( + """// Q5. Top posters in a country + |/* + | :param { country: 'Belarus' } + |*/ + |MATCH + | (:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- + | (person:Person)<-[:HAS_MEMBER]-(forum:Forum) + |WITH forum, count(person) AS numberOfMembers + |ORDER BY numberOfMembers DESC, forum.id ASC + |LIMIT 100 + |WITH collect(forum) AS popularForums + |UNWIND popularForums AS forum + |MATCH + | (forum)-[:HAS_MEMBER]->(person:Person) + |OPTIONAL MATCH + | (person)<-[:HAS_CREATOR]-(post:Post)<-[:CONTAINER_OF]-(popularForum:Forum) + |WHERE popularForum IN popularForums + |RETURN + | person.id, + | person.firstName, + | person.lastName, + | person.creationDate, + | count(DISTINCT post) AS postCount + |ORDER BY + | postCount DESC, + | person.id ASC + |LIMIT 100 + """.stripMargin) + } - ignore("test 1") { - val indexer = new Indexer() + test("bi-06: Most active Posters of a given Topic") { + val stages = run( + """// Q6. Most active Posters of a given Topic + |/* + | :param { tag: 'Abbas_I_of_Persia' } + |*/ + |MATCH (tag:Tag {name: $tag})<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person:Person) + |OPTIONAL MATCH (:Person)-[like:LIKES]->(message) + |OPTIONAL MATCH (message)<-[:REPLY_OF]-(comment:Comment) + |WITH person, count(DISTINCT like) AS likeCount, count(DISTINCT comment) AS replyCount, count(DISTINCT message) AS messageCount + |RETURN + | person.id, + | messageCount, + | replyCount, + | likeCount, + | 1*messageCount + 2*replyCount + 10*likeCount AS score + |ORDER BY + | score DESC, + | person.id ASC + |LIMIT 100 + """.stripMargin) + } + + test("bi-07: Most authoritative users on a given topic") { + val stages = run( + """// Q7. Most authoritative users on a given topic + |/* + | :param { tag: 'Arnold_Schwarzenegger' } + |*/ + |MATCH (tag:Tag {name: $tag}) + |MATCH (tag)<-[:HAS_TAG]-(message1:Message)-[:HAS_CREATOR]->(person1:Person) + |MATCH (tag)<-[:HAS_TAG]-(message2:Message)-[:HAS_CREATOR]->(person1) + |OPTIONAL MATCH (message2)<-[:LIKES]-(person2:Person) + |OPTIONAL MATCH (person2)<-[:HAS_CREATOR]-(message3:Message)<-[like:LIKES]-(p3:Person) + |RETURN + | person1.id, + | count(DISTINCT like) AS authorityScore + |ORDER BY + | authorityScore DESC, + | person1.id ASC + |LIMIT 100 + """.stripMargin) + } - val readQuery = - """MATCH (country:Country {name: 'Austria'}) - |MATCH (a:Person)-[:isLocatedIn]->(:City)-[:isPartOf]->(country) - |MATCH (b:Person)-[:isLocatedIn]->(:City)-[:isPartOf]->(country) - |MATCH (c:Person)-[:isLocatedIn]->(:City)-[:isPartOf]->(country) - |MATCH (a)-[:knows]-(b), (b)-[:knows]-(c), (c)-[:knows]-(a) + test("bi-08: Related Topics") { + val stages = run( + """// Q8. Related Topics + |/* + | :param { tag: 'Genghis_Khan' } + |*/ + |MATCH + | (tag:Tag {name: $tag})<-[:HAS_TAG]-(message:Message), + | (message)<-[:REPLY_OF]-(comment:Comment)-[:HAS_TAG]->(relatedTag:Tag) + | // there is no need to filter for relatedTag.name != $tag, as the edge uniqueness constraint takes care of that + |WHERE NOT (comment)-[:HAS_TAG]->(tag) + |RETURN + | relatedTag.name, + | count(DISTINCT comment) AS count + |ORDER BY + | count DESC, + | relatedTag.name ASC + |LIMIT 100 + """.stripMargin) + } + + test("bi-09: Forum with related Tags") { + val stages = run( + """// Q9. Forum with related Tags + |/* + | :param { + | tagClass1: 'BaseballPlayer', + | tagClass2: 'ChristianBishop', + | threshold: 200 + | } + |*/ + |MATCH + | (forum:Forum)-[:HAS_MEMBER]->(person:Person) + |WITH forum, count(person) AS members + |WHERE members > $threshold + |MATCH + | (forum)-[:CONTAINER_OF]->(post1:Post)-[:HAS_TAG]-> + | (:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass1}) + |WITH forum, count(DISTINCT post1) AS count1 + |MATCH + | (forum)-[:CONTAINER_OF]->(post2:Post)-[:HAS_TAG]-> + | (:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass2}) + |WITH forum, count1, count(DISTINCT post2) AS count2 + |RETURN + | forum.id, + | count1, + | count2 + |ORDER BY + | abs(count2-count1) DESC, + | forum.id ASC + |LIMIT 100 + """.stripMargin) + } + + test("bi-10: Central Person for a Tag") { + val stages = run( + """// Q10. Central Person for a Tag + |/* + | :param { + | tag: 'John_Rhys-Davies', + | date: 20120122000000000 + | } + |*/ + |MATCH (tag:Tag {name: $tag}) + |// score + |OPTIONAL MATCH (tag)<-[interest:HAS_INTEREST]-(person:Person) + |WITH tag, collect(person) AS interestedPersons + |OPTIONAL MATCH (tag)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person:Person) + | WHERE message.creationDate > $date + |WITH tag, interestedPersons + collect(person) AS persons + |UNWIND persons AS person + |// poor man's disjunct union (should be changed to UNION + post-union processing in the future) + |WITH DISTINCT tag, person + |OPTIONAL MATCH (tag)<-[interest:HAS_INTEREST]-(person:Person) + |WITH + | tag, + | person, + | 100 * count(interest) AS score + |OPTIONAL MATCH (tag)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person) + | WHERE message.creationDate > $date + |WITH + | tag, + | person, + | score + count(message) AS score + |MATCH (person)-[:KNOWS]-(friend) + |WITH + | tag, + | person, + | score, + | friend + |OPTIONAL MATCH (tag)<-[interest:HAS_INTEREST]-(friend:Person) + |WITH + | tag, + | person, + | score, + | friend, + | 100 * count(interest) AS friendScore + |OPTIONAL MATCH (tag)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(friend) + | WHERE message.creationDate > $date + |WITH + | person, + | score, + | friend, + | friendScore + count(message) AS friendScore + |RETURN + | person.id, + | score, + | sum(friendScore) AS friendsScore + |ORDER BY + | score + friendsScore DESC, + | person.id ASC + |LIMIT 100 + """.stripMargin) + } + + ignore("bi-11: Unrelated replies") { + val stages = run( + """// Q11. Unrelated replies + |/* + | :param { + | country: 'Germany', + | blacklist: ['also', 'Pope', 'that', 'James', 'Henry', 'one', 'Green'] + | } + |*/ + |WITH $blacklist AS blacklist + |MATCH + | (country:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- + | (person:Person)<-[:HAS_CREATOR]-(reply:Comment)-[:REPLY_OF]->(message:Message), + | (reply)-[:HAS_TAG]->(tag:Tag) + |WHERE NOT (message)-[:HAS_TAG]->(:Tag)<-[:HAS_TAG]-(reply) + | AND size([word IN blacklist WHERE reply.content CONTAINS word | word]) = 0 + |OPTIONAL MATCH + | (:Person)-[like:LIKES]->(reply) + |RETURN + | person.id, + | tag.name, + | count(DISTINCT like) AS countLikes, + | count(DISTINCT reply) AS countReplies + |ORDER BY + | countLikes DESC, + | person.id ASC, + | tag.name ASC + |LIMIT 100 + """.stripMargin) + } + + test("bi-12: Trending Posts") { + val stages = run( + """// Q12. Trending Posts + |/* + | :param { + | date: 20110721220000000, + | likeThreshold: 400 + | } + |*/ + |MATCH + | (message:Message)-[:HAS_CREATOR]->(creator:Person), + | (message)<-[like:LIKES]-(:Person) + |WHERE message.creationDate > $date + |WITH message, creator, count(like) AS likeCount + |WHERE likeCount > $likeThreshold + |RETURN + | message.id, + | message.creationDate, + | creator.firstName, + | creator.lastName, + | likeCount + |ORDER BY + | likeCount DESC, + | message.id ASC + |LIMIT 100 + """.stripMargin) + } + + // list comprehension + ignore("bi-13: Popular Tags per month in a country") { + val stages = run( + """// Q13. Popular Tags per month in a country + |/* + | :param { country: 'Burma' } + |*/ + |MATCH (:Country {name: $country})<-[:IS_LOCATED_IN]-(message:Message) + |OPTIONAL MATCH (message)-[:HAS_TAG]->(tag:Tag) + |WITH + | message.creationDate/10000000000000 AS year, + | message.creationDate/100000000000%100 AS month, + | message, + | tag + |WITH year, month, count(message) AS popularity, tag + |ORDER BY popularity DESC, tag.name ASC + |WITH + | year, + | month, + | collect([tag.name, popularity]) AS popularTags + |WITH + | year, + | month, + | [popularTag IN popularTags WHERE popularTag[0] IS NOT NULL] AS popularTags + |RETURN + | year, + | month, + | [i IN range(0, (CASE size(popularTags) < 5 WHEN true THEN size(popularTags) ELSE 5 END)-1) + | | popularTags[i]] AS topPopularTags + |ORDER BY + | year DESC, + | month ASC + |LIMIT 100 + """.stripMargin) + } + + test("bi-14: Top thread initiators") { + val stages = run( + """// Q14. Top thread initiators + |/* + | :param { + | startDate: 20120531220000000, + | endDate: 20120630220000000 + | } + |*/ + |MATCH (person:Person)<-[:HAS_CREATOR]-(post:Post)<-[:REPLY_OF*0..]-(reply:Message) + |WHERE post.creationDate >= $startDate + | AND post.creationDate <= $endDate + | AND reply.creationDate >= $startDate + | AND reply.creationDate <= $endDate + |RETURN + | person.id, + | person.firstName, + | person.lastName, + | count(DISTINCT post) AS threadCount, + | count(DISTINCT reply) AS messageCount + |ORDER BY + | messageCount DESC, + | person.id ASC + |LIMIT 100 + """.stripMargin) + } + + test("bi-15: Social normals") { + val stages = run( + """// Q15. Social normals + |/* + | :param { country: 'Burma' } + |*/ + |MATCH + | (country:Country {name: $country}) + |MATCH + | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person1:Person) + |OPTIONAL MATCH + | // start a new MATCH as friend might live in the same City + | // and thus can reuse the IS_PART_OF edge + | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(friend1:Person), + | (person1)-[:KNOWS]-(friend1) + |WITH country, person1, count(friend1) AS friend1Count + |WITH country, floor(avg(friend1Count)) AS socialNormal + |MATCH + | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person2:Person) + |MATCH + | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(friend2:Person), + | (person2)-[:KNOWS]-(friend2) + |WITH country, person2, count(friend2) AS friend2Count, socialNormal + |WHERE friend2Count = socialNormal + |RETURN + | person2.id, + | friend2Count AS count + |ORDER BY + | person2.id ASC + |LIMIT 100 + """.stripMargin) + } + + ignore("bi-16: Experts in social circle") { + val stages = run( + """// Q16. Experts in social circle + |/* + | :param { + | personId: 6597069777419, + | country: 'Pakistan', + | tagClass: 'MusicalArtist', + | minPathDistance: 3, + | maxPathDistance: 5 + | } + |*/ + |// This query will not work in a browser as is. I tried alternatives approaches, + |// e.g. enabling path of arbitrary lengths, saving the path to a variable p and + |// checking for `$minPathDistance <= length(p)`, but these could not be + |// evaluated due to the excessive amount of paths. + |// If you would like to test the query in the browser, replace the values of + |// $minPathDistance and $maxPathDistance to a constant. + |MATCH + | (:Person {id: $personId})-[:KNOWS*$minPathDistance..$maxPathDistance]- + | (person:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]-> + | (:Country {name: $country}), + | (person)<-[:HAS_CREATOR]-(message:Message)-[:HAS_TAG]->(:Tag)-[:HAS_TYPE]-> + | (:TagClass {name: $tagClass}) + |MATCH (message)-[:HAS_TAG]->(tag:Tag) + |RETURN + | person.id, + | tag.name, + | count(message) AS messageCount + |ORDER BY + | messageCount DESC, + | tag.name ASC, + | person.id ASC + |LIMIT 100 + """.stripMargin) + } + + test("bi-17: Friend triangles") { + val stages = run( + """// Q17. Friend triangles + |/* + | :param { country: 'Spain' } + |*/ + |MATCH (country:Country {name: $country}) + |MATCH (a:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) + |MATCH (b:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) + |MATCH (c:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) + |MATCH (a)-[:KNOWS]-(b), (b)-[:KNOWS]-(c), (c)-[:KNOWS]-(a) |WHERE a.id < b.id | AND b.id < c.id |RETURN count(*) - """.stripMargin + |// as a less elegant solution, count(a) also works + """.stripMargin) + } + + test("bi-18: How many persons have a given number of posts") { + val stages = run( + """// Q18. How many persons have a given number of posts + |/* + | :param { + | date: 20110722000000000, + | lengthThreshold: 20, + | languages: ['ar'] + | } + |*/ + |MATCH (person:Person) + |OPTIONAL MATCH (person)<-[:HAS_CREATOR]-(message:Message)-[:REPLY_OF*0..]->(post:Post) + |WHERE message.content IS NOT NULL + | AND message.length < $lengthThreshold + | AND message.creationDate > $date + | AND post.language IN $languages + |WITH + | person, + | count(message) AS messageCount + |RETURN + | messageCount, + | count(person) AS personCount + |ORDER BY + | personCount DESC, + | messageCount DESC + """.stripMargin) + } + + test("bi-19: Stranger's interaction") { + val stages = run( + """// Q19. Stranger's interaction + |/* + | :param { + | date: 19890101, + | tagClass1: 'MusicalArtist', + | tagClass2: 'OfficeHolder' + | } + |*/ + |MATCH + | (:TagClass {name: $tagClass1})<-[:HAS_TYPE]-(:Tag)<-[:HAS_TAG]- + | (forum1:Forum)-[:HAS_MEMBER]->(stranger:Person) + |WITH DISTINCT stranger + |MATCH + | (:TagClass {name: $tagClass2})<-[:HAS_TYPE]-(:Tag)<-[:HAS_TAG]- + | (forum2:Forum)-[:HAS_MEMBER]->(stranger) + |WITH DISTINCT stranger + |MATCH + | (person:Person)<-[:HAS_CREATOR]-(:Message)-[:REPLY_OF]- + | (:Message)-[:HAS_CREATOR]->(stranger) + |WHERE person.birthday > $date + | AND person <> stranger + | AND NOT (person)-[:KNOWS]-(stranger) + |WITH person, stranger + |OPTIONAL MATCH + | (person)<-[:HAS_CREATOR]-(comment1:Comment)-[:REPLY_OF]->(:Message)-[:HAS_CREATOR]->(stranger) + |OPTIONAL MATCH + | (stranger)<-[:HAS_CREATOR]-(comment2:Comment)-[:REPLY_OF]->(:Message)-[:HAS_CREATOR]->(person) + |WITH + | person, + | count(stranger) AS strangersCount, + | count(comment1) AS comment1Count, + | count(comment2) AS comment2Count + |RETURN + | person.id, + | strangersCount, + | comment1Count + comment2Count AS interactionCount + |ORDER BY + | interactionCount DESC, + | person.id ASC + |LIMIT 100 + """.stripMargin) + } + + test("bi-20: High-level topics") { + val stages = run( + """// Q20. High-level topics + |/* + | :param { tagClasses: ['Writer', 'Single', 'Country'] } + |*/ + |UNWIND $tagClasses AS tagClassName + |MATCH + | (tagClass:TagClass {name: tagClassName})<-[:IS_SUBCLASS_OF*0..]- + | (:TagClass)<-[:HAS_TYPE]-(tag:Tag)<-[:HAS_TAG]-(message:Message) + |RETURN + | tagClass.name, + | count(message) AS postCount + |ORDER BY + | postCount DESC, + | tagClass.name ASC + |LIMIT 100 + """.stripMargin) + } + + test("bi-21: Zombies in a country") { + val stages = run( + """// Q21. Zombies in a country + |/* + | :param { + | country: 'Ethiopia', + | endDate: 20130101000000000 + | } + |*/ + |MATCH (country:Country {name: $country}) + |WITH + | country, + | $endDate/10000000000000 AS endDateYear, + | $endDate/100000000000%100 AS endDateMonth + |MATCH + | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(zombie:Person) + |OPTIONAL MATCH + | (zombie)<-[:HAS_CREATOR]-(message:Message) + |WHERE zombie.creationDate < $endDate + | AND message.creationDate < $endDate + |WITH + | country, + | zombie, + | endDateYear, + | endDateMonth, + | zombie.creationDate/10000000000000 AS zombieCreationYear, + | zombie.creationDate/100000000000%100 AS zombieCreationMonth, + | count(message) AS messageCount + |WITH + | country, + | zombie, + | 12 * (endDateYear - zombieCreationYear ) + | + (endDateMonth - zombieCreationMonth) + | + 1 AS months, + | messageCount + |WHERE messageCount / months < 1 + |WITH + | country, + | collect(zombie) AS zombies + |UNWIND zombies AS zombie + |OPTIONAL MATCH + | (zombie)<-[:HAS_CREATOR]-(message:Message)<-[:LIKES]-(likerZombie:Person) + |WHERE likerZombie IN zombies + |WITH + | zombie, + | count(likerZombie) AS zombieLikeCount + |OPTIONAL MATCH + | (zombie)<-[:HAS_CREATOR]-(message:Message)<-[:LIKES]-(likerPerson:Person) + |WHERE likerPerson.creationDate < $endDate + |WITH + | zombie, + | zombieLikeCount, + | count(likerPerson) AS totalLikeCount + |RETURN + | zombie.id, + | zombieLikeCount, + | totalLikeCount, + | CASE totalLikeCount + | WHEN 0 THEN 0 + | ELSE zombieLikeCount / toFloat(totalLikeCount) + | END AS zombieScore + |ORDER BY + | zombieScore DESC, + | zombie.id ASC + |LIMIT 100 + """.stripMargin) + } + + ignore("bi-22: International dialog") { + val stages = run( + """// Q22. International dialog + |/* + | :param { + | country1: 'Mexico', + | country2: 'Indonesia' + | } + |*/ + |MATCH + | (country1:Country {name: $country1})<-[:IS_PART_OF]-(city1:City)<-[:IS_LOCATED_IN]-(person1:Person), + | (country2:Country {name: $country2})<-[:IS_PART_OF]-(city2:City)<-[:IS_LOCATED_IN]-(person2:Person) + |WITH person1, person2, city1, 0 AS score + |// subscore 1 + |OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(:Message)-[:HAS_CREATOR]->(person2) + |WITH DISTINCT person1, person2, city1, score + (CASE c WHEN null THEN 0 ELSE 4 END) AS score + |// subscore 2 + |OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(m:Message)<-[:REPLY_OF]-(:Comment)-[:HAS_CREATOR]->(person2) + |WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE 1 END) AS score + |// subscore 3 + |OPTIONAL MATCH (person1)-[k:KNOWS]-(person2) + |WITH DISTINCT person1, person2, city1, score + (CASE k WHEN null THEN 0 ELSE 15 END) AS score + |// subscore 4 + |OPTIONAL MATCH (person1)-[:LIKES]->(m:Message)-[:HAS_CREATOR]->(person2) + |WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE 10 END) AS score + |// subscore 5 + |OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(m:Message)<-[:LIKES]-(person2) + |WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE 1 END) AS score + |// preorder + |ORDER BY + | city1.name ASC, + | score DESC, + | person1.id ASC, + | person2.id ASC + |WITH + | city1, + | // using a list might be faster, but the browser query editor does not like it + | collect({score: score, person1: person1, person2: person2})[0] AS top + |RETURN + | top.person1.id, + | top.person2.id, + | city1.name, + | top.score + |ORDER BY + | top.score DESC, + | top.person1.id ASC, + | top.person2.id ASC + """.stripMargin) + } + + test("bi-23: Holiday destinations") { + val stages = run( + """// Q23. Holiday destinations + |/* + | :param { country: 'Egypt' } + |*/ + |MATCH + | (home:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- + | (:Person)<-[:HAS_CREATOR]-(message:Message)-[:IS_LOCATED_IN]->(destination:Country) + |WHERE home <> destination + |WITH + | message, + | destination, + | message.creationDate/100000000000%100 AS month + |RETURN + | count(message) AS messageCount, + | destination.name, + | month + |ORDER BY + | messageCount DESC, + | destination.name ASC, + | month ASC + |LIMIT 100 + """.stripMargin) + } + + test("bi-24: Messages by Topic and Continent") { + val stages = run( + """// Q24. Messages by Topic and Continent + |/* + | :param { tagClass: 'Single' } + |*/ + |MATCH (:TagClass {name: $tagClass})<-[:HAS_TYPE]-(:Tag)<-[:HAS_TAG]-(message:Message) + |WITH DISTINCT message + |MATCH (message)-[:IS_LOCATED_IN]->(:Country)-[:IS_PART_OF]->(continent:Continent) + |OPTIONAL MATCH (message)<-[like:LIKES]-(:Person) + |WITH + | message, + | message.creationDate/10000000000000 AS year, + | message.creationDate/100000000000%100 AS month, + | like, + | continent + |RETURN + | count(DISTINCT message) AS messageCount, + | count(like) AS likeCount, + | year, + | month, + | continent.name + |ORDER BY + | year ASC, + | month ASC, + | continent.name DESC + |LIMIT 100 + """.stripMargin) + } - val readAdapter = new IngraphIncrementalAdapter(readQuery, "read", indexer) - val result = readAdapter.result() + ignore("bi-25: Weighted paths") { + val stages = run( + """// Q25. Weighted paths + |/* + | :param { + | person1Id: 2199023264119, + | person2Id: 8796093028894, + | startDate: 20100601040000000, + | endDate: 20100701040000000 + | } + |*/ + |MATCH path = (p1:Person {id: $person1Id})-[:KNOWS*]-(p2:Person {id: $person2Id}) + |WITH p1, p2, path + |ORDER BY length(path) + |WITH p1, p2, collect(path)[0] AS path // select the shortest path + |UNWIND relationships(path) AS k + |WITH + | path, + | startNode(k) AS pA, + | endNode(k) AS pB, + | 0 AS weight + |// A to B + |// every reply (by one of the Persons) to a Post (by the other Person): 1.0 + |OPTIONAL MATCH + | (pA)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(m:Post)-[:HAS_CREATOR]->(pB), + | (m)<-[:CONTAINER_OF]-(forum:Forum) + |WHERE forum.creationDate >= $startDate AND forum.creationDate <= $endDate + |WITH path, pA, pB, weight + count(c)*1.0 AS weight + |// A to B + |// every reply (by ones of the Persons) to a Comment (by the other Person): 0.5 + |OPTIONAL MATCH + | (pA)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(m:Comment)-[:HAS_CREATOR]->(pB), + | (m)<-[:CONTAINER_OF]-(forum:Forum) + |WHERE forum.creationDate >= $startDate AND forum.creationDate <= $endDate + |WITH path, pA, pB, weight + count(c)*0.5 AS weight + |// B to A + |// every reply (by one of the Persons) to a Post (by the other Person): 1.0 + |OPTIONAL MATCH + | (pB)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(m:Post)-[:HAS_CREATOR]->(pA), + | (m)<-[:CONTAINER_OF]-(forum:Forum) + |WHERE forum.creationDate >= $startDate AND forum.creationDate <= $endDate + |WITH path, pA, pB, weight + count(c)*1.0 AS weight + |// B to A + |// every reply (by ones of the Persons) to a Comment (by the other Person): 0.5 + |OPTIONAL MATCH + | (pB)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(m:Comment)-[:HAS_CREATOR]->(pA), + | (m)<-[:CONTAINER_OF]-(forum:Forum) + |WHERE forum.creationDate >= $startDate AND forum.creationDate <= $endDate + |WITH path, pA, pB, weight + count(c)*0.5 AS weight + |RETURN + | [person IN nodes(path) | person.id] + |ORDER BY + | weight DESC + """.stripMargin) } } diff --git a/ire-adapter/src/test/scala/ingraph/ire/EngineTest.scala b/ire-adapter/src/test/scala/ingraph/ire/EngineTest.scala new file mode 100644 index 000000000..32ead3e0a --- /dev/null +++ b/ire-adapter/src/test/scala/ingraph/ire/EngineTest.scala @@ -0,0 +1,13 @@ +package ingraph.ire + +import org.scalatest.FunSuite + +class EngineTest extends FunSuite { + + def run(readQuery: String): Unit = { + val indexer = new Indexer() + val readAdapter = new IngraphIncrementalAdapter(readQuery, "read", indexer) + val result = readAdapter.result() + } + +} From 57547f5feee782e20951c72432e04311aa75c057 Mon Sep 17 00:00:00 2001 From: Gabor Szarnyas Date: Wed, 31 Jan 2018 22:35:25 +0100 Subject: [PATCH 02/11] Add basic 101 test --- ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala b/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala index abf37ea13..f8b3854e9 100644 --- a/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala +++ b/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala @@ -2,6 +2,13 @@ package ingraph.ire class BiEngineTest extends EngineTest { + test("bi-101: Count persons") { + val stages = run( + """MATCH (p:Person)-[:IS_LOCATED_IN]->(c:country) + |RETURN p, count(c) AS cc + """.stripMargin) + } + test("bi-01: Posting summary") { val stages = run( """// Q1. Posting summary From d9e2c46e64b92073ee876ac39d70e100bf9eccb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=CC=81nos=20Maginecz?= Date: Sun, 4 Feb 2018 19:30:34 +0100 Subject: [PATCH 03/11] Move function parsing to compile time --- .../expressionparser/ExpressionParser.scala | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/expression-parser/src/main/scala/ingraph/expressionparser/ExpressionParser.scala b/expression-parser/src/main/scala/ingraph/expressionparser/ExpressionParser.scala index 627b0696e..19738ad06 100644 --- a/expression-parser/src/main/scala/ingraph/expressionparser/ExpressionParser.scala +++ b/expression-parser/src/main/scala/ingraph/expressionparser/ExpressionParser.scala @@ -1,16 +1,18 @@ package ingraph.parse import hu.bme.mit.ire.datatypes.Tuple +import hu.bme.mit.ire.nodes.unary.aggregation.{StatefulAggregate, StatefulCollect} import hu.bme.mit.ire.util.GenericMath import ingraph.expressionparser.FunctionLookup -import ingraph.model.expr.{FunctionInvocation, TupleIndexLiteralAttribute} +import ingraph.model.expr.{FunctionInvocation, Parameter, TupleIndexLiteralAttribute} +import ingraph.model.misc.FunctionCategory import org.apache.spark.sql.catalyst.expressions.{Add, And, BinaryArithmetic, BinaryComparison, BinaryOperator, CaseWhen, Divide, EqualTo, Expression, GreaterThan, GreaterThanOrEqual, IsNotNull, IsNull, LessThan, LessThanOrEqual, Literal, Multiply, Not, Or, Pmod, Remainder, Subtract} import org.apache.spark.unsafe.types.UTF8String object ExpressionParser { def apply[T](exp: Expression): Tuple => T = { - val e = parse(exp) + val e: Tuple => Any = parse(exp) t => e(t).asInstanceOf[T] } @@ -64,10 +66,18 @@ object ExpressionParser { case invoc: FunctionInvocation => val children: Seq[Tuple => Any] = invoc.children.map(parse) children.length match { - case 0 => tuple => FunctionLookup.fun0(invoc.functor)() - case 1 => tuple => FunctionLookup.fun1(invoc.functor)(children.head(tuple)) - case 2 => tuple => FunctionLookup.fun2(invoc.functor)(children(0)(tuple), children(1)(tuple)) - case 3 => tuple => FunctionLookup.fun3(invoc.functor)(children(0)(tuple), children(1)(tuple), children(2)(tuple)) + case 0 => + val fun = FunctionLookup.fun0(invoc.functor) + tuple => fun() + case 1 => + val fun = FunctionLookup.fun1(invoc.functor) + tuple => fun(children.head(tuple)) + case 2 => + val fun = FunctionLookup.fun2(invoc.functor) + tuple => (children(0)(tuple), children(1)(tuple)) + case 3 => + val fun = FunctionLookup.fun3(invoc.functor) + tuple => fun (children(0)(tuple), children(1)(tuple), children(2)(tuple)) } case exp: CaseWhen => println(exp) From 7ebbd26e222ee6279c4d00c724c9f907e0375fef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=CC=81nos=20Maginecz?= Date: Sun, 4 Feb 2018 21:24:56 +0100 Subject: [PATCH 04/11] Restore aggregations in enginefactory --- .../expressionparser/ExpressionParser.scala | 39 ++++++++----------- .../scala/ingraph/ire/EngineFactory.scala | 32 +++++++-------- .../TrainbenchmarkBatchIntegrationTest.scala | 6 +++ .../unary/aggregation/AggregationNode.scala | 14 +++---- .../ire/nodes/unary/AggregationNodeTest.scala | 12 +++--- .../bme/mit/ire/nodes/unary/MaxNodeTest.scala | 2 +- .../bme/mit/ire/nodes/unary/MinNodeTest.scala | 2 +- 7 files changed, 54 insertions(+), 53 deletions(-) diff --git a/expression-parser/src/main/scala/ingraph/expressionparser/ExpressionParser.scala b/expression-parser/src/main/scala/ingraph/expressionparser/ExpressionParser.scala index 19738ad06..9cc0465c0 100644 --- a/expression-parser/src/main/scala/ingraph/expressionparser/ExpressionParser.scala +++ b/expression-parser/src/main/scala/ingraph/expressionparser/ExpressionParser.scala @@ -1,7 +1,7 @@ package ingraph.parse import hu.bme.mit.ire.datatypes.Tuple -import hu.bme.mit.ire.nodes.unary.aggregation.{StatefulAggregate, StatefulCollect} +import hu.bme.mit.ire.nodes.unary.aggregation._ import hu.bme.mit.ire.util.GenericMath import ingraph.expressionparser.FunctionLookup import ingraph.model.expr.{FunctionInvocation, Parameter, TupleIndexLiteralAttribute} @@ -91,27 +91,22 @@ object ExpressionParser { fallback(tuple) } caseFunction + case par: Parameter => tuple => "ot" } -// -// - -// def parseAggregate(exp: Expression, lookup: Map[String, Integer]): List[(String, () => StatefulAggregate)] = exp match { -// case exp: FunctionExpression if exp.getFunctor.getCategory == FunctionCategory.AGGREGATION => -// if (exp.getFunctor != COLLECT) { -// val variable = exp.getArguments.get(0).asInstanceOf[VariableExpression].getVariable -// val index = lookup(variable.fullName) -// List((exp.fullName, exp.getFunctor match { -// case AVG => () => new StatefulAverage(index) -// case COUNT => () => new NullAwareStatefulCount(index) -// case COUNT_ALL => () => new StatefulCount() -// case MAX => () => new StatefulMax(index) -// case MIN => () => new StatefulMin(index) -// case SUM => () => new StatefulSum(index) -// })) -// } else { -// val list = parseListExpression(exp.getArguments.get(0).asInstanceOf[ListExpression]) -// val indices = list.map(e => lookup(e.asInstanceOf[VariableExpression].getVariable.fullName)).map(_.toInt) -// List((exp.fullName, () => new StatefulCollect(indices))) -// } + def parseAggregate(exp: Expression): Option[(Int, () => StatefulAggregate)] = exp match { + case FunctionInvocation(functor, Seq(TupleIndexLiteralAttribute(index, _)), _) => + import ingraph.model.misc.Function._ + val factory = functor match { + case AVG => () => new StatefulAverage(index) + case COUNT => () => new NullAwareStatefulCount(index) + case COUNT_ALL => () => new StatefulCount() + case MAX => () => new StatefulMax(index) + case MIN => () => new StatefulMin(index) + case SUM => () => new StatefulSum(index) + } + Some((index, factory)) + //TODO: collect + case _ => None + } } diff --git a/ire-adapter/src/main/scala/ingraph/ire/EngineFactory.scala b/ire-adapter/src/main/scala/ingraph/ire/EngineFactory.scala index 85e4024f1..442147d41 100644 --- a/ire-adapter/src/main/scala/ingraph/ire/EngineFactory.scala +++ b/ire-adapter/src/main/scala/ingraph/ire/EngineFactory.scala @@ -7,6 +7,7 @@ import hu.bme.mit.ire.engine.RelationalEngine import hu.bme.mit.ire.messages.{ChangeSet, ReteMessage} import hu.bme.mit.ire.nodes.binary.{AntiJoinNode, JoinNode, LeftOuterJoinNode, UnionNode} import hu.bme.mit.ire.nodes.unary._ +import hu.bme.mit.ire.nodes.unary.aggregation.{AggregationNode, StatefulAggregate} import hu.bme.mit.ire.util.BufferMultimap import hu.bme.mit.ire.util.Utils.conversions._ import ingraph.model.expr._ @@ -53,7 +54,7 @@ object EngineFactory { case op: UnaryTNode => val node: (ReteMessage) => Unit = op match { case op: Production => production -// case op: Grouping => instantiateGrouping(op, expr) + case op: Grouping => grouping(op, expr) case op: SortAndTop => sortAndTop(op, expr) case op: Selection => selection(op, expr) case op: Projection => projection(op, expr) @@ -126,21 +127,20 @@ object EngineFactory { // unary nodes -// private def instantiateGrouping(op: Grouping, expr: ForwardConnection) = { -// val variableLookup = getSchema(op.child) -// val aggregates = op.fnode.jnode.projectList.flatMap( -// e => ExpressionParser.parseAggregate(e, variableLookup) -// ) -// val functions = () => aggregates.map( -// _._2() // GOOD LUCK UNDERSTANDING THIS -// ) -// val aggregationCriteria = op.fnode.jnode.aggregationCriteria.map(e => (e, ExpressionParser.parseValue(e, variableLookup))) -// val projectionVariableLookup: Map[String, Int] = -// aggregationCriteria.zipWithIndex.map( a => a._1._1.fullName -> a._2.asInstanceOf[Integer] ).toMap ++ -// aggregates.zipWithIndex.map( az => az._1._1 -> (az._2 + op.fnode.jnode.aggregationCriteria.size())) -// val projectionExpressions = op.internalSchema.map( e => ExpressionParser.parseValue(e, projectionVariableLookup)) -// newLocal(Props(new AggregationNode(expr.child, aggregationCriteria.map(_._2), functions, projectionExpressions))) -// } + private def grouping(op: Grouping, expr: ForwardConnection) = { + val aggregates = op.projectionTuple.map(e => ExpressionParser.parseAggregate(e)) + val factories = aggregates.flatten.map(_._2()).toVector + val nonAggregates = op.projectionTuple.filter(op.aggregationCriteria.contains) + var normalIndex = 0 + var aggregateIndex = op.projectionTuple.size - op.aggregationCriteria.size - 1 + val projections = aggregates.map { + case None => normalIndex += 1; normalIndex - 1 + case Some(_) => aggregateIndex += 1; aggregateIndex - 1 + } + val aggregationMask = op.aggregationCriteria.map(e => ExpressionParser[Any](e)).toVector + + newLocal(Props(new AggregationNode(expr.child, aggregationMask, () => factories, projections.toVector))) + } private def selection(op: Selection, expr: ForwardConnection) = { newLocal(Props(new SelectionNode(expr.child, ExpressionParser[Boolean](op.condition)))) diff --git a/ire-adapter/src/test/scala/ingraph/ire/TrainbenchmarkBatchIntegrationTest.scala b/ire-adapter/src/test/scala/ingraph/ire/TrainbenchmarkBatchIntegrationTest.scala index 0a9501345..59e6a7ec7 100644 --- a/ire-adapter/src/test/scala/ingraph/ire/TrainbenchmarkBatchIntegrationTest.scala +++ b/ire-adapter/src/test/scala/ingraph/ire/TrainbenchmarkBatchIntegrationTest.scala @@ -41,4 +41,10 @@ class TrainbenchmarkBatchIntegrationTest extends FunSuite { val expected = (7 to 9).reverse.map(n => Vector(n.toLong)) assert(results == expected) } + + test("basic aggregations") { + val query = "MATCH (s: Switch) RETURN count(s)" + val results = TrainbenchmarkUtils.readModelAndGetResults(query, 1) + assert(results == List(Vector(40))) + } } diff --git a/ire/src/main/scala/hu/bme/mit/ire/nodes/unary/aggregation/AggregationNode.scala b/ire/src/main/scala/hu/bme/mit/ire/nodes/unary/aggregation/AggregationNode.scala index ef53a1e79..9a7741aa9 100755 --- a/ire/src/main/scala/hu/bme/mit/ire/nodes/unary/aggregation/AggregationNode.scala +++ b/ire/src/main/scala/hu/bme/mit/ire/nodes/unary/aggregation/AggregationNode.scala @@ -9,15 +9,15 @@ import scala.collection.immutable.VectorBuilder import scala.collection.mutable class AggregationNode(override val next: (ReteMessage) => Unit, - mask: Vector[Tuple => Any], functions: () => Vector[StatefulAggregate], - projection: Vector[Tuple => Any]) extends UnaryNode with SingleForwarder { + mask: Vector[Tuple => Any], factories: () => Vector[StatefulAggregate], + projection: Vector[Int]) extends UnaryNode with SingleForwarder { private val keyCount = mutable.Map[Tuple, Int]().withDefault(f => 0) - private val data = mutable.Map[Tuple, Vector[StatefulAggregate]]().withDefault(f => functions()) + private val data = mutable.Map[Tuple, Vector[StatefulAggregate]]().withDefault(f => factories()) override def onChangeSet(changeSet: ChangeSet): Unit = { val oldValues = mutable.Map[Tuple, (Tuple, Int)]() for ((key, tuples) <- changeSet.positive.groupBy(t => mask.map(m => m(t)))) { - val aggregators = data.getOrElseUpdate(key, functions()) + val aggregators = data.getOrElseUpdate(key, factories()) oldValues.getOrElseUpdate(key, (aggregators.map(_.value()), keyCount(key))) for (aggregator <- aggregators) @@ -25,7 +25,7 @@ class AggregationNode(override val next: (ReteMessage) => Unit, keyCount(key) += tuples.size } for ((key, tuples) <- changeSet.negative.groupBy(t => mask.map(m => m(t)))) { - val aggregators = data.getOrElseUpdate(key, functions()) + val aggregators = data.getOrElseUpdate(key, factories()) oldValues.getOrElseUpdate(key, (aggregators.map(_.value()), keyCount(key))) for (aggregator <- aggregators) aggregator.maintainNegative(tuples) @@ -44,8 +44,8 @@ class AggregationNode(override val next: (ReteMessage) => Unit, negative += key ++ oldValues } } - val positiveBag: TupleBag = positive.result().map((t: Tuple) => projection.map(_(t))) - val negativeBag: TupleBag = negative.result().map((t: Tuple) => projection.map(_(t))) + val positiveBag: TupleBag = positive.result().map(t => projection.map(t)) + val negativeBag: TupleBag = negative.result().map(t => projection.map(t)) forward(ChangeSet( positive = positiveBag, negative = negativeBag)) diff --git a/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/AggregationNodeTest.scala b/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/AggregationNodeTest.scala index c5e28db82..e4586252e 100755 --- a/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/AggregationNodeTest.scala +++ b/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/AggregationNodeTest.scala @@ -30,7 +30,7 @@ class AggregationNodeTest(_system: ActorSystem) extends TestKit(_system) with Im "count with complex keys" in { val echoActor = system.actorOf(TestActors.echoActorProps) val counter = system.actorOf(Props(new AggregationNode(echoActor ! _, functionMask(3, 0), - () => Vector(new StatefulCount()), functionMask(0, 1, 2)))) // sex and the city + () => Vector(new StatefulCount()), Vector(0, 1, 2)))) // sex and the city counter ! ChangeSet(positive = tupleBag(odin)) expectMsg(ChangeSet(positive = tupleBag(tuple("male", "Asgard", 1)))) counter ! ChangeSet(positive = tupleBag(thor)) @@ -54,7 +54,7 @@ class AggregationNodeTest(_system: ActorSystem) extends TestKit(_system) with Im "collect with complex keys" in { val echoActor = system.actorOf(TestActors.echoActorProps) val counter = system.actorOf(Props(new AggregationNode(echoActor ! _, functionMask(3, 0), - () => Vector(new StatefulCollect(Vector(2))), functionMask(0, 1, 2)))) // (sex, city): (weapon) + () => Vector(new StatefulCollect(Vector(2))), Vector(0, 1, 2)))) // (sex, city): (weapon) counter ! ChangeSet(positive = tupleBag(odin)) expectMsg(ChangeSet(positive = tupleBag(tuple("male", "Asgard", cypherList(Vector("Gungnir")))))) counter ! ChangeSet(positive = tupleBag(thor)) @@ -99,7 +99,7 @@ class AggregationNodeTest(_system: ActorSystem) extends TestKit(_system) with Im "sum with complex keys" in { val echoActor = system.actorOf(TestActors.echoActorProps) val counter = system.actorOf(Props(new AggregationNode(echoActor ! _, functionMask(3), - () => Vector(new StatefulSum(4)), functionMask(0, 1)))) // sex, sum for height + () => Vector(new StatefulSum(4)), Vector(0, 1)))) // sex, sum for height counter ! ChangeSet(positive = tupleBag(odin)) assertNextChangeSetWithTolerance(key = 1, positive = Some(1)) counter ! ChangeSet(positive = tupleBag(thor)) @@ -113,7 +113,7 @@ class AggregationNodeTest(_system: ActorSystem) extends TestKit(_system) with Im "average with complex keys" in { val echoActor = system.actorOf(TestActors.echoActorProps) val counter = system.actorOf(Props(new AggregationNode(echoActor ! _, functionMask(3), - () => Vector(new StatefulAverage(4)), functionMask(0, 1)))) // sex, sum for height + () => Vector(new StatefulAverage(4)), Vector(0, 1)))) // sex, sum for height counter ! ChangeSet(positive = tupleBag(odin)) assertNextChangeSetWithTolerance(key = 1, positive = Some(1)) counter ! ChangeSet(positive = tupleBag(thor)) @@ -135,7 +135,7 @@ class AggregationNodeTest(_system: ActorSystem) extends TestKit(_system) with Im val echoActor = system.actorOf(TestActors.echoActorProps) val stddev = system.actorOf(Props(new AggregationNode(echoActor ! _, functionMask(3), - () => Vector(new StatefulStandardDeviation(4)), functionMask(0, 1)))) // sex, sum for height + () => Vector(new StatefulStandardDeviation(4)), Vector(0, 1)))) // sex, sum for height stddev ! ChangeSet(positive = tupleBag(t1)) assertNextChangeSetWithTolerance(key = 1, positive = Some(0)) stddev ! ChangeSet(positive = tupleBag(t2)) @@ -165,7 +165,7 @@ class AggregationNodeTest(_system: ActorSystem) extends TestKit(_system) with Im val echoActor = system.actorOf(TestActors.echoActorProps) val stddev = system.actorOf(Props(new AggregationNode(echoActor ! _, functionMask(3), - () => Vector(new StatefulStandardDeviationSample(4)), functionMask(0, 1)))) // sex, sum for height + () => Vector(new StatefulStandardDeviationSample(4)), Vector(0, 1)))) // sex, sum for height stddev ! ChangeSet(positive = tupleBag(t1)) assertNextChangeSetWithTolerance(key = 1, positive = Some(0)) stddev ! ChangeSet(positive = tupleBag(t2)) diff --git a/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/MaxNodeTest.scala b/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/MaxNodeTest.scala index 01e4509ca..3131a4254 100644 --- a/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/MaxNodeTest.scala +++ b/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/MaxNodeTest.scala @@ -23,7 +23,7 @@ class MaxNodeTest(_system: ActorSystem) extends TestKit(_system) with ImplicitSe ) val echoActor = system.actorOf(TestActors.echoActorProps) val max = system.actorOf(Props(new AggregationNode( - echoActor ! _, functionMask(0), () => Vector(new StatefulMax(1)), functionMask(0, 1)))) + echoActor ! _, functionMask(0), () => Vector(new StatefulMax(1)), Vector(0, 1)))) max ! changeSet expectMsg(ChangeSet( diff --git a/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/MinNodeTest.scala b/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/MinNodeTest.scala index 5b90bcd65..a156c3526 100644 --- a/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/MinNodeTest.scala +++ b/ire/src/test/scala/hu/bme/mit/ire/nodes/unary/MinNodeTest.scala @@ -23,7 +23,7 @@ class MinNodeTest(_system: ActorSystem) extends TestKit(_system) with ImplicitSe ) val echoActor = system.actorOf(TestActors.echoActorProps) val min = system.actorOf(Props( - new AggregationNode(echoActor ! _, functionMask(0), () => Vector(new StatefulMin(1)), functionMask(0, 1)))) + new AggregationNode(echoActor ! _, functionMask(0), () => Vector(new StatefulMin(1)), Vector(0, 1)))) min ! changeSet expectMsg(ChangeSet( From 8192a372561e93bbf8132a2dcebd14d12c291893 Mon Sep 17 00:00:00 2001 From: Gabor Szarnyas Date: Thu, 1 Feb 2018 21:55:55 +0100 Subject: [PATCH 05/11] Update index.md --- site/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/site/index.md b/site/index.md index 9a9ea57d5..60f6a6dd4 100644 --- a/site/index.md +++ b/site/index.md @@ -54,6 +54,7 @@ Incremental openCypher queries can also be beneficial for: # Related projects * [Grapflow](http://graphflow.io/) also aims to provide "continuous subgraph queries", based their extension of the openCypher query language, openCypher++. +* [Strider](https://github.com/renxiangnan/strider) supports incremental queries on distributed RDF data. # Publications From 320b2576c4db8a86c12a7ce66e365a2d602cdf69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= Date: Thu, 1 Feb 2018 22:01:39 +0100 Subject: [PATCH 06/11] Update index.md --- site/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/index.md b/site/index.md index 60f6a6dd4..e583a9127 100644 --- a/site/index.md +++ b/site/index.md @@ -32,7 +32,7 @@ ingraph is suited for the following technical challenges: ingraph is not efficient/expressive enough for the following technical challenges: -* Queries that are evaluated only evaluated once or infrequently (i.e. batch processing, daily analysis) [our main goal for the summer of 2017 is to resolve this issue, so stay tuned] +* Queries that are evaluated only evaluated once or infrequently (i.e. batch processing, daily analysis) * Graph analytics involving PageRank, community detection, etc. Currently, the ingraph project is not mature enough for production use. Instead, it should be used in prototypes and performance experiments. From db805eb4e898e433b8e3f21a0f9266b46a658bc0 Mon Sep 17 00:00:00 2001 From: Jozsef Marton Date: Mon, 5 Feb 2018 21:52:50 +0100 Subject: [PATCH 07/11] cypher2qplan: raise exception if non top-level aggregation encountered in returnitems --- .../compiler/cypher2qplan/QPlanResolver.scala | 29 ++++++++++++------- .../scala/ingraph/compiler/exceptions.scala | 2 ++ .../ingraph/sandbox/BiCompilerTest.scala | 6 ++-- queries/ldbc-snb-bi/bi-01.cypher | 3 +- queries/ldbc-snb-bi/bi-15.cypher | 3 +- 5 files changed, 28 insertions(+), 15 deletions(-) diff --git a/compiler/src/main/scala/ingraph/compiler/cypher2qplan/QPlanResolver.scala b/compiler/src/main/scala/ingraph/compiler/cypher2qplan/QPlanResolver.scala index eaab27878..563098105 100644 --- a/compiler/src/main/scala/ingraph/compiler/cypher2qplan/QPlanResolver.scala +++ b/compiler/src/main/scala/ingraph/compiler/cypher2qplan/QPlanResolver.scala @@ -3,7 +3,7 @@ package ingraph.compiler.cypher2qplan import java.util.concurrent.atomic.AtomicLong import ingraph.compiler.cypher2qplan.util.TransformUtil -import ingraph.compiler.exceptions.{CompilerException, NameResolutionException} +import ingraph.compiler.exceptions.{CompilerException, IllegalAggregationException, NameResolutionException} import ingraph.model.expr.{ProjectionDescriptor, ResolvableName, ReturnItem} import ingraph.model.qplan.{QNode, UnaryQNode} import ingraph.model.{expr, misc, qplan} @@ -335,17 +335,24 @@ object QPlanResolver { // those having no aggregation function in top-level position will form the aggregation criteria if at least one aggregation is seen val aggregationCriteriaCandidate: ListBuffer[Expression] = ListBuffer.empty - val seenAggregate = projectList.foldLeft[Boolean](false)((b, a) => b || (a.child match { - // FIXME: UnresolvedStar is also allowed until it is resolved - case UnresolvedStar(_) => false - case e: cExpr.Expression => if (isAggregatingFunctionInvocation(e)) { - true - } else { - aggregationCriteriaCandidate += e - false + val seenAggregate = projectList.foldLeft[Boolean](false)((b, a) => b || ({ + val projectItem = a.child + // validate aggregation semantics: no aggregation function is allowed at non top-level + projectItem.children.foreach( c => c.find(isAggregatingFunctionInvocation).fold[Unit](Unit)( e => throw new IllegalAggregationException(e.toString) ) ) + + // see if this return item is an aggregation + projectItem match { + // FIXME: UnresolvedStar is also allowed until it is resolved + case UnresolvedStar(_) => false + case e: cExpr.Expression => if (isAggregatingFunctionInvocation(e)) { + true + } else { + aggregationCriteriaCandidate += e + false + } + // This should never be reached, as projectList is expr.types.TProjectList + case x => throw new CompilerException(s"Unexpected type found in return item position: ${x.getClass}") } - // This should never be reached, as projectList is expr.types.TProjectList - case x => throw new CompilerException(s"Unexpected type found in return item position: ${x.getClass}") })) if (seenAggregate) { diff --git a/compiler/src/main/scala/ingraph/compiler/exceptions.scala b/compiler/src/main/scala/ingraph/compiler/exceptions.scala index caa00b1b9..34163c714 100644 --- a/compiler/src/main/scala/ingraph/compiler/exceptions.scala +++ b/compiler/src/main/scala/ingraph/compiler/exceptions.scala @@ -9,3 +9,5 @@ class ExpandChainException(override val message: String) extends CompilerExcepti class PatternNotAllowedException(override val message: String) extends CompilerException(message) class NameResolutionException(val name: String) extends CompilerException(s"Unresolvable name ${name}.") + +class IllegalAggregationException(val info: String) extends CompilerException(s"Aggregation found at non top-level in return item: ${info}.") diff --git a/compiler/src/test/scala/ingraph/sandbox/BiCompilerTest.scala b/compiler/src/test/scala/ingraph/sandbox/BiCompilerTest.scala index aa4585eec..8171a1787 100644 --- a/compiler/src/test/scala/ingraph/sandbox/BiCompilerTest.scala +++ b/compiler/src/test/scala/ingraph/sandbox/BiCompilerTest.scala @@ -24,7 +24,8 @@ class BiCompilerTest extends CompilerTest { |*/ |MATCH (message:Message) |WHERE message.creationDate <= $date - |WITH toFloat(count(message)) AS totalMessageCount // this should be a subquery once Cypher supports it + |WITH count(message) AS totalMessageCountInt // this should be a subquery once Cypher supports it + |WITH toFloat(totalMessageCountInt) AS totalMessageCount |MATCH (message:Message) |WHERE message.creationDate <= $date | AND message.content IS NOT NULL @@ -554,7 +555,8 @@ class BiCompilerTest extends CompilerTest { | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(friend1:Person), | (person1)-[:KNOWS]-(friend1) |WITH country, person1, count(friend1) AS friend1Count - |WITH country, floor(avg(friend1Count)) AS socialNormal + |WITH country, avg(friend1Count) AS socialNormalFloat + |WITH country, floor(socialNormalFloat) AS socialNormal |MATCH | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person2:Person) |MATCH diff --git a/queries/ldbc-snb-bi/bi-01.cypher b/queries/ldbc-snb-bi/bi-01.cypher index 53546f092..3dbeedd1a 100644 --- a/queries/ldbc-snb-bi/bi-01.cypher +++ b/queries/ldbc-snb-bi/bi-01.cypher @@ -4,7 +4,8 @@ */ MATCH (message:Message) WHERE message.creationDate <= $date -WITH toFloat(count(message)) AS totalMessageCount // this should be a subquery once Cypher supports it +WITH count(message) AS totalMessageCountInt // this should be a subquery once Cypher supports it +WITH toFloat(totalMessageCountInt) AS totalMessageCount MATCH (message:Message) WHERE message.creationDate <= $date AND message.content IS NOT NULL diff --git a/queries/ldbc-snb-bi/bi-15.cypher b/queries/ldbc-snb-bi/bi-15.cypher index b8cfe84be..8a8d66f0c 100644 --- a/queries/ldbc-snb-bi/bi-15.cypher +++ b/queries/ldbc-snb-bi/bi-15.cypher @@ -12,7 +12,8 @@ OPTIONAL MATCH (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(friend1:Person), (person1)-[:KNOWS]-(friend1) WITH country, person1, count(friend1) AS friend1Count -WITH country, floor(avg(friend1Count)) AS socialNormal +WITH country, avg(friend1Count) AS socialNormalFloat +WITH country, floor(socialNormalFloat) AS socialNormal MATCH (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person2:Person) MATCH From a8991d644e4839a040b7f2dec5590377ea6ac3e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=CC=81nos=20Maginecz?= Date: Wed, 7 Feb 2018 19:19:55 +0100 Subject: [PATCH 08/11] Having fun is important --- .../scala/ingraph/expressionparser/ExpressionParser.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/expression-parser/src/main/scala/ingraph/expressionparser/ExpressionParser.scala b/expression-parser/src/main/scala/ingraph/expressionparser/ExpressionParser.scala index 9cc0465c0..4568f63da 100644 --- a/expression-parser/src/main/scala/ingraph/expressionparser/ExpressionParser.scala +++ b/expression-parser/src/main/scala/ingraph/expressionparser/ExpressionParser.scala @@ -74,10 +74,10 @@ object ExpressionParser { tuple => fun(children.head(tuple)) case 2 => val fun = FunctionLookup.fun2(invoc.functor) - tuple => (children(0)(tuple), children(1)(tuple)) + tuple => fun(children(0)(tuple), children(1)(tuple)) case 3 => val fun = FunctionLookup.fun3(invoc.functor) - tuple => fun (children(0)(tuple), children(1)(tuple), children(2)(tuple)) + tuple => fun(children(0)(tuple), children(1)(tuple), children(2)(tuple)) } case exp: CaseWhen => println(exp) From ba0b4cdd9cfd63df8049fa145fb504f17cea914d Mon Sep 17 00:00:00 2001 From: Gabor Szarnyas Date: Wed, 7 Feb 2018 20:18:00 +0100 Subject: [PATCH 09/11] Remove redundancy from tests --- .../ingraph/sandbox/BiCompilerTest.scala | 908 +----------------- .../scala/ingraph/sandbox/CompilerTest.scala | 2 +- .../test/scala/ingraph/ire/BiEngineTest.scala | 884 +---------------- .../test/scala/ingraph/ire/EngineTest.scala | 10 +- 4 files changed, 78 insertions(+), 1726 deletions(-) diff --git a/compiler/src/test/scala/ingraph/sandbox/BiCompilerTest.scala b/compiler/src/test/scala/ingraph/sandbox/BiCompilerTest.scala index 8171a1787..760f8aaf2 100644 --- a/compiler/src/test/scala/ingraph/sandbox/BiCompilerTest.scala +++ b/compiler/src/test/scala/ingraph/sandbox/BiCompilerTest.scala @@ -12,982 +12,104 @@ class BiCompilerTest extends CompilerTest { , printFPlan = true ) - ignore("bi-01 from file: Posting summary") { + test("bi-01 from file: Posting summary") { val stages=compileFromFile("bi-01") } - test("bi-01: Posting summary") { - val stages = compile( - """// Q1. Posting summary - |/* - | :param { date: 20110721220000000 } - |*/ - |MATCH (message:Message) - |WHERE message.creationDate <= $date - |WITH count(message) AS totalMessageCountInt // this should be a subquery once Cypher supports it - |WITH toFloat(totalMessageCountInt) AS totalMessageCount - |MATCH (message:Message) - |WHERE message.creationDate <= $date - | AND message.content IS NOT NULL - |WITH - | totalMessageCount, - | message, - | message.creationDate/10000000000000 AS year - |WITH - | totalMessageCount, - | year, - | (message:Comment) AS isComment, - | CASE - | WHEN message.length < 40 THEN 0 - | WHEN message.length < 80 THEN 1 - | WHEN message.length < 160 THEN 2 - | ELSE 3 - | END AS lengthCategory, - | count(message) AS messageCount, - | floor(avg(message.length)) AS averageMessageLength, - | sum(message.length) AS sumMessageLength - |RETURN - | year, - | isComment, - | lengthCategory, - | messageCount, - | averageMessageLength, - | sumMessageLength, - | messageCount / totalMessageCount AS percentageOfMessages - |ORDER BY - | year DESC, - | isComment ASC, - | lengthCategory ASC - """.stripMargin) - } - ignore("bi-02 from file: Top tags for country, age, gender, time") { val stages=compileFromFile("bi-02") } - ignore("bi-02: Top tags for country, age, gender, time") { - val stages = compile( - """// Q2. Top tags for country, age, gender, time - |/* - | :param { - | date1: 20091231230000000, - | date2: 20101107230000000, - | country1: 'Ethiopia', - | country2: 'Belarus' - | } - |*/ - |MATCH - | (country:Country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person:Person) - | <-[:HAS_CREATOR]-(message:Message)-[:HAS_TAG]->(tag:Tag) - |WHERE message.creationDate >= $startDate - | AND message.creationDate <= $endDate - | AND (country.name = $country1 OR country.name = $country2) - |WITH - | country.name AS countryName, - | message.creationDate/100000000000%100 AS month, - | person.gender AS gender, - | floor((20130101 - person.birthday) / 10000 / 5.0) AS ageGroup, - | tag.name AS tagName, - | message - |WITH - | countryName, month, gender, ageGroup, tagName, count(message) AS messageCount - |WHERE messageCount > 100 - |RETURN - | countryName, - | month, - | gender, - | ageGroup, - | tagName, - | messageCount - |ORDER BY - | messageCount DESC, - | tagName ASC, - | ageGroup ASC, - | gender ASC, - | month ASC, - | countryName ASC - |LIMIT 100 - """.stripMargin) - } - ignore("bi-03 from file: Tag evolution") { val stages=compileFromFile("bi-03") } - ignore("bi-03: Tag evolution") { - val stages = compile( - """// Q3. Tag evolution - |/* - | :param { - | year: 2010, - | month: 10 - | } - |*/ - |WITH - | $year AS year1, - | $month AS month1, - | $year + toInteger($month / 12.0) AS year2, - | $month % 12 + 1 AS month2 - |// year-month 1 - |MATCH (tag:Tag) - |OPTIONAL MATCH (message1:Message)-[:HAS_TAG]->(tag) - | WHERE message1.creationDate/10000000000000 = year1 - | AND message1.creationDate/100000000000%100 = month1 - |WITH year2, month2, tag, count(message1) AS countMonth1 - |// year-month 2 - |OPTIONAL MATCH (message2:Message)-[:HAS_TAG]->(tag) - | WHERE message2.creationDate/10000000000000 = year2 - | AND message2.creationDate/100000000000%100 = month2 - |WITH - | tag, - | countMonth1, - | count(message2) AS countMonth2 - |RETURN - | tag.name, - | countMonth1, - | countMonth2, - | abs(countMonth1-countMonth2) AS diff - |ORDER BY - | diff DESC, - | tag.name ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-04 from file: Popular topics in a country") { + test("bi-04 from file: Popular topics in a country") { val stages=compileFromFile("bi-04") } - test("bi-04: Popular topics in a country") { - val stages = compile( - """// Q4. Popular topics in a country - |/* - | :param { - | tagClass: 'MusicalArtist', - | country: 'Burma' - | } - |*/ - |MATCH - | (:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- - | (person:Person)<-[:HAS_MODERATOR]-(forum:Forum)-[:CONTAINER_OF]-> - | (post:Post)-[:HAS_TAG]->(:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass}) - |RETURN - | forum.id, - | forum.title, - | forum.creationDate, - | person.id, - | count(DISTINCT post) AS postCount - |ORDER BY - | postCount DESC, - | forum.id ASC - |LIMIT 20 """.stripMargin) - } - ignore("bi-05 from file: Top posters in a country") { val stages=compileFromFile("bi-05") } - ignore("bi-05: Top posters in a country") { - val stages = compile( - """// Q5. Top posters in a country - |/* - | :param { country: 'Belarus' } - |*/ - |MATCH - | (:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- - | (person:Person)<-[:HAS_MEMBER]-(forum:Forum) - |WITH forum, count(person) AS numberOfMembers - |ORDER BY numberOfMembers DESC, forum.id ASC - |LIMIT 100 - |WITH collect(forum) AS popularForums - |UNWIND popularForums AS forum - |MATCH - | (forum)-[:HAS_MEMBER]->(person:Person) - |OPTIONAL MATCH - | (person)<-[:HAS_CREATOR]-(post:Post)<-[:CONTAINER_OF]-(popularForum:Forum) - |WHERE popularForum IN popularForums - |RETURN - | person.id, - | person.firstName, - | person.lastName, - | person.creationDate, - | count(DISTINCT post) AS postCount - |ORDER BY - | postCount DESC, - | person.id ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-06 from file: Most active Posters of a given Topic") { + test("bi-06 from file: Most active Posters of a given Topic") { val stages=compileFromFile("bi-06") } - test("bi-06: Most active Posters of a given Topic") { - val stages = compile( - """// Q6. Most active Posters of a given Topic - |/* - | :param { tag: 'Abbas_I_of_Persia' } - |*/ - |MATCH (tag:Tag {name: $tag})<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person:Person) - |OPTIONAL MATCH (:Person)-[like:LIKES]->(message) - |OPTIONAL MATCH (message)<-[:REPLY_OF]-(comment:Comment) - |WITH person, count(DISTINCT like) AS likeCount, count(DISTINCT comment) AS replyCount, count(DISTINCT message) AS messageCount - |RETURN - | person.id, - | messageCount, - | replyCount, - | likeCount, - | 1*messageCount + 2*replyCount + 10*likeCount AS score - |ORDER BY - | score DESC, - | person.id ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-07 from file: Most authoritative users on a given topic") { + test("bi-07 from file: Most authoritative users on a given topic") { val stages=compileFromFile("bi-07") } - test("bi-07: Most authoritative users on a given topic") { - val stages = compile( - """// Q7. Most authoritative users on a given topic - |/* - | :param { tag: 'Arnold_Schwarzenegger' } - |*/ - |MATCH (tag:Tag {name: $tag}) - |MATCH (tag)<-[:HAS_TAG]-(message1:Message)-[:HAS_CREATOR]->(person1:Person) - |MATCH (tag)<-[:HAS_TAG]-(message2:Message)-[:HAS_CREATOR]->(person1) - |OPTIONAL MATCH (message2)<-[:LIKES]-(person2:Person) - |OPTIONAL MATCH (person2)<-[:HAS_CREATOR]-(message3:Message)<-[like:LIKES]-(p3:Person) - |RETURN - | person1.id, - | count(DISTINCT like) AS authorityScore - |ORDER BY - | authorityScore DESC, - | person1.id ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-08 from file: Related Topics") { + test("bi-08 from file: Related Topics") { val stages=compileFromFile("bi-08") } - test("bi-08: Related Topics") { - val stages = compile( - """// Q8. Related Topics - |/* - | :param { tag: 'Genghis_Khan' } - |*/ - |MATCH - | (tag:Tag {name: $tag})<-[:HAS_TAG]-(message:Message), - | (message)<-[:REPLY_OF]-(comment:Comment)-[:HAS_TAG]->(relatedTag:Tag) - | // there is no need to filter for relatedTag.name != $tag, as the edge uniqueness constraint takes care of that - |WHERE NOT (comment)-[:HAS_TAG]->(tag) - |RETURN - | relatedTag.name, - | count(DISTINCT comment) AS count - |ORDER BY - | count DESC, - | relatedTag.name ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-09 from file: Forum with related Tags") { + test("bi-09 from file: Forum with related Tags") { val stages=compileFromFile("bi-09") } - test("bi-09: Forum with related Tags") { - val stages = compile( - """// Q9. Forum with related Tags - |/* - | :param { - | tagClass1: 'BaseballPlayer', - | tagClass2: 'ChristianBishop', - | threshold: 200 - | } - |*/ - |MATCH - | (forum:Forum)-[:HAS_MEMBER]->(person:Person) - |WITH forum, count(person) AS members - |WHERE members > $threshold - |MATCH - | (forum)-[:CONTAINER_OF]->(post1:Post)-[:HAS_TAG]-> - | (:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass1}) - |WITH forum, count(DISTINCT post1) AS count1 - |MATCH - | (forum)-[:CONTAINER_OF]->(post2:Post)-[:HAS_TAG]-> - | (:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass2}) - |WITH forum, count1, count(DISTINCT post2) AS count2 - |RETURN - | forum.id, - | count1, - | count2 - |ORDER BY - | abs(count2-count1) DESC, - | forum.id ASC - |LIMIT 100 - """.stripMargin) - } - ignore("bi-10 from file: Central Person for a Tag") { val stages=compileFromFile("bi-10") } - ignore("bi-10: Central Person for a Tag") { - val stages = compile( - """// Q10. Central Person for a Tag - |/* - | :param { - | tag: 'John_Rhys-Davies', - | date: 20120122000000000 - | } - |*/ - |MATCH (tag:Tag {name: $tag}) - |// score - |OPTIONAL MATCH (tag)<-[interest:HAS_INTEREST]-(person:Person) - |WITH tag, collect(person) AS interestedPersons - |OPTIONAL MATCH (tag)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person:Person) - | WHERE message.creationDate > $date - |WITH tag, interestedPersons + collect(person) AS persons - |UNWIND persons AS person - |// poor man's disjunct union (should be changed to UNION + post-union processing in the future) - |WITH DISTINCT tag, person - |OPTIONAL MATCH (tag)<-[interest:HAS_INTEREST]-(person:Person) - |WITH - | tag, - | person, - | 100 * count(interest) AS score - |OPTIONAL MATCH (tag)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person) - | WHERE message.creationDate > $date - |WITH - | tag, - | person, - | score + count(message) AS score - |MATCH (person)-[:KNOWS]-(friend) - |WITH - | tag, - | person, - | score, - | friend - |OPTIONAL MATCH (tag)<-[interest:HAS_INTEREST]-(friend:Person) - |WITH - | tag, - | person, - | score, - | friend, - | 100 * count(interest) AS friendScore - |OPTIONAL MATCH (tag)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(friend) - | WHERE message.creationDate > $date - |WITH - | person, - | score, - | friend, - | friendScore + count(message) AS friendScore - |RETURN - | person.id, - | score, - | sum(friendScore) AS friendsScore - |ORDER BY - | score + friendsScore DESC, - | person.id ASC - |LIMIT 100 - """.stripMargin) - } - ignore("bi-11 from file: Unrelated replies") { val stages=compileFromFile("bi-11") } - ignore("bi-11: Unrelated replies") { - val stages = compile( - """// Q11. Unrelated replies - |/* - | :param { - | country: 'Germany', - | blacklist: ['also', 'Pope', 'that', 'James', 'Henry', 'one', 'Green'] - | } - |*/ - |WITH $blacklist AS blacklist - |MATCH - | (country:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- - | (person:Person)<-[:HAS_CREATOR]-(reply:Comment)-[:REPLY_OF]->(message:Message), - | (reply)-[:HAS_TAG]->(tag:Tag) - |WHERE NOT (message)-[:HAS_TAG]->(:Tag)<-[:HAS_TAG]-(reply) - | AND size([word IN blacklist WHERE reply.content CONTAINS word | word]) = 0 - |OPTIONAL MATCH - | (:Person)-[like:LIKES]->(reply) - |RETURN - | person.id, - | tag.name, - | count(DISTINCT like) AS countLikes, - | count(DISTINCT reply) AS countReplies - |ORDER BY - | countLikes DESC, - | person.id ASC, - | tag.name ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-12 from file: Trending Posts") { + test("bi-12 from file: Trending Posts") { val stages=compileFromFile("bi-12") } - test("bi-12: Trending Posts") { - val stages = compile( - """// Q12. Trending Posts - |/* - | :param { - | date: 20110721220000000, - | likeThreshold: 400 - | } - |*/ - |MATCH - | (message:Message)-[:HAS_CREATOR]->(creator:Person), - | (message)<-[like:LIKES]-(:Person) - |WHERE message.creationDate > $date - |WITH message, creator, count(like) AS likeCount - |WHERE likeCount > $likeThreshold - |RETURN - | message.id, - | message.creationDate, - | creator.firstName, - | creator.lastName, - | likeCount - |ORDER BY - | likeCount DESC, - | message.id ASC - |LIMIT 100 - """.stripMargin) - } - ignore("bi-13 from file: Popular Tags per month in a country") { val stages=compileFromFile("bi-13") } - ignore("bi-13: Popular Tags per month in a country") { - val stages = compile( - """// Q13. Popular Tags per month in a country - |/* - | :param { country: 'Burma' } - |*/ - |MATCH (:Country {name: $country})<-[:IS_LOCATED_IN]-(message:Message) - |OPTIONAL MATCH (message)-[:HAS_TAG]->(tag:Tag) - |WITH - | message.creationDate/10000000000000 AS year, - | message.creationDate/100000000000%100 AS month, - | message, - | tag - |WITH year, month, count(message) AS popularity, tag - |ORDER BY popularity DESC, tag.name ASC - |WITH - | year, - | month, - | collect([tag.name, popularity]) AS popularTags - |WITH - | year, - | month, - | [popularTag IN popularTags WHERE popularTag[0] IS NOT NULL] AS popularTags - |RETURN - | year, - | month, - | [i IN range(0, (CASE size(popularTags) < 5 WHEN true THEN size(popularTags) ELSE 5 END)-1) - | | popularTags[i]] AS topPopularTags - |ORDER BY - | year DESC, - | month ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-14 from file: Top thread initiators") { + test("bi-14 from file: Top thread initiators") { val stages=compileFromFile("bi-14") } - test("bi-14: Top thread initiators") { - val stages = compile( - """// Q14. Top thread initiators - |/* - | :param { - | startDate: 20120531220000000, - | endDate: 20120630220000000 - | } - |*/ - |MATCH (person:Person)<-[:HAS_CREATOR]-(post:Post)<-[:REPLY_OF*0..]-(reply:Message) - |WHERE post.creationDate >= $startDate - | AND post.creationDate <= $endDate - | AND reply.creationDate >= $startDate - | AND reply.creationDate <= $endDate - |RETURN - | person.id, - | person.firstName, - | person.lastName, - | count(DISTINCT post) AS threadCount, - | count(DISTINCT reply) AS messageCount - |ORDER BY - | messageCount DESC, - | person.id ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-15 from file: Social normals") { + test("bi-15 from file: Social normals") { val stages=compileFromFile("bi-15") } - test("bi-15: Social normals") { - val stages = compile( - """// Q15. Social normals - |/* - | :param { country: 'Burma' } - |*/ - |MATCH - | (country:Country {name: $country}) - |MATCH - | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person1:Person) - |OPTIONAL MATCH - | // start a new MATCH as friend might live in the same City - | // and thus can reuse the IS_PART_OF edge - | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(friend1:Person), - | (person1)-[:KNOWS]-(friend1) - |WITH country, person1, count(friend1) AS friend1Count - |WITH country, avg(friend1Count) AS socialNormalFloat - |WITH country, floor(socialNormalFloat) AS socialNormal - |MATCH - | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person2:Person) - |MATCH - | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(friend2:Person), - | (person2)-[:KNOWS]-(friend2) - |WITH country, person2, count(friend2) AS friend2Count, socialNormal - |WHERE friend2Count = socialNormal - |RETURN - | person2.id, - | friend2Count AS count - |ORDER BY - | person2.id ASC - |LIMIT 100 - """.stripMargin) - } - ignore("bi-16 from file: Experts in social circle") { val stages=compileFromFile("bi-16") } - ignore("bi-16: Experts in social circle") { - val stages = compile( - """// Q16. Experts in social circle - |/* - | :param { - | personId: 6597069777419, - | country: 'Pakistan', - | tagClass: 'MusicalArtist', - | minPathDistance: 3, - | maxPathDistance: 5 - | } - |*/ - |// This query will not work in a browser as is. I tried alternatives approaches, - |// e.g. enabling path of arbitrary lengths, saving the path to a variable p and - |// checking for `$minPathDistance <= length(p)`, but these could not be - |// evaluated due to the excessive amount of paths. - |// If you would like to test the query in the browser, replace the values of - |// $minPathDistance and $maxPathDistance to a constant. - |MATCH - | (:Person {id: $personId})-[:KNOWS*$minPathDistance..$maxPathDistance]- - | (person:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]-> - | (:Country {name: $country}), - | (person)<-[:HAS_CREATOR]-(message:Message)-[:HAS_TAG]->(:Tag)-[:HAS_TYPE]-> - | (:TagClass {name: $tagClass}) - |MATCH (message)-[:HAS_TAG]->(tag:Tag) - |RETURN - | person.id, - | tag.name, - | count(message) AS messageCount - |ORDER BY - | messageCount DESC, - | tag.name ASC, - | person.id ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-17 from file: Friend triangles") { + test("bi-17 from file: Friend triangles") { val stages=compileFromFile("bi-17") } - test("bi-17: Friend triangles") { - val stages = compile( - """// Q17. Friend triangles - |/* - | :param { country: 'Spain' } - |*/ - |MATCH (country:Country {name: $country}) - |MATCH (a:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) - |MATCH (b:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) - |MATCH (c:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) - |MATCH (a)-[:KNOWS]-(b), (b)-[:KNOWS]-(c), (c)-[:KNOWS]-(a) - |WHERE a.id < b.id - | AND b.id < c.id - |RETURN count(*) - |// as a less elegant solution, count(a) also works - """.stripMargin) - } - - ignore("bi-18 from file: How many persons have a given number of posts") { + test("bi-18 from file: How many persons have a given number of posts") { val stages=compileFromFile("bi-18") } - test("bi-18: How many persons have a given number of posts") { - val stages = compile( - """// Q18. How many persons have a given number of posts - |/* - | :param { - | date: 20110722000000000, - | lengthThreshold: 20, - | languages: ['ar'] - | } - |*/ - |MATCH (person:Person) - |OPTIONAL MATCH (person)<-[:HAS_CREATOR]-(message:Message)-[:REPLY_OF*0..]->(post:Post) - |WHERE message.content IS NOT NULL - | AND message.length < $lengthThreshold - | AND message.creationDate > $date - | AND post.language IN $languages - |WITH - | person, - | count(message) AS messageCount - |RETURN - | messageCount, - | count(person) AS personCount - |ORDER BY - | personCount DESC, - | messageCount DESC - """.stripMargin) - } - - ignore("bi-19 from file: Stranger's interaction") { + test("bi-19 from file: Stranger's interaction") { val stages=compileFromFile("bi-19") } - test("bi-19: Stranger's interaction") { - val stages = compile( - """// Q19. Stranger's interaction - |/* - | :param { - | date: 19890101, - | tagClass1: 'MusicalArtist', - | tagClass2: 'OfficeHolder' - | } - |*/ - |MATCH - | (:TagClass {name: $tagClass1})<-[:HAS_TYPE]-(:Tag)<-[:HAS_TAG]- - | (forum1:Forum)-[:HAS_MEMBER]->(stranger:Person) - |WITH DISTINCT stranger - |MATCH - | (:TagClass {name: $tagClass2})<-[:HAS_TYPE]-(:Tag)<-[:HAS_TAG]- - | (forum2:Forum)-[:HAS_MEMBER]->(stranger) - |WITH DISTINCT stranger - |MATCH - | (person:Person)<-[:HAS_CREATOR]-(:Message)-[:REPLY_OF]- - | (:Message)-[:HAS_CREATOR]->(stranger) - |WHERE person.birthday > $date - | AND person <> stranger - | AND NOT (person)-[:KNOWS]-(stranger) - |WITH person, stranger - |OPTIONAL MATCH - | (person)<-[:HAS_CREATOR]-(comment1:Comment)-[:REPLY_OF]->(:Message)-[:HAS_CREATOR]->(stranger) - |OPTIONAL MATCH - | (stranger)<-[:HAS_CREATOR]-(comment2:Comment)-[:REPLY_OF]->(:Message)-[:HAS_CREATOR]->(person) - |WITH - | person, - | count(stranger) AS strangersCount, - | count(comment1) AS comment1Count, - | count(comment2) AS comment2Count - |RETURN - | person.id, - | strangersCount, - | comment1Count + comment2Count AS interactionCount - |ORDER BY - | interactionCount DESC, - | person.id ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-20 from file: High-level topics") { + test("bi-20 from file: High-level topics") { val stages=compileFromFile("bi-20") } - test("bi-20: High-level topics") { - val stages = compile( - """// Q20. High-level topics - |/* - | :param { tagClasses: ['Writer', 'Single', 'Country'] } - |*/ - |UNWIND $tagClasses AS tagClassName - |MATCH - | (tagClass:TagClass {name: tagClassName})<-[:IS_SUBCLASS_OF*0..]- - | (:TagClass)<-[:HAS_TYPE]-(tag:Tag)<-[:HAS_TAG]-(message:Message) - |RETURN - | tagClass.name, - | count(message) AS postCount - |ORDER BY - | postCount DESC, - | tagClass.name ASC - |LIMIT 100 - """.stripMargin) - } - ignore("bi-21 from file: Zombies in a country") { val stages=compileFromFile("bi-21") } - ignore("bi-21: Zombies in a country") { - val stages = compile( - """// Q21. Zombies in a country - |/* - | :param { - | country: 'Ethiopia', - | endDate: 20130101000000000 - | } - |*/ - |MATCH (country:Country {name: $country}) - |WITH - | country, - | $endDate/10000000000000 AS endDateYear, - | $endDate/100000000000%100 AS endDateMonth - |MATCH - | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(zombie:Person) - |OPTIONAL MATCH - | (zombie)<-[:HAS_CREATOR]-(message:Message) - |WHERE zombie.creationDate < $endDate - | AND message.creationDate < $endDate - |WITH - | country, - | zombie, - | endDateYear, - | endDateMonth, - | zombie.creationDate/10000000000000 AS zombieCreationYear, - | zombie.creationDate/100000000000%100 AS zombieCreationMonth, - | count(message) AS messageCount - |WITH - | country, - | zombie, - | 12 * (endDateYear - zombieCreationYear ) - | + (endDateMonth - zombieCreationMonth) - | + 1 AS months, - | messageCount - |WHERE messageCount / months < 1 - |WITH - | country, - | collect(zombie) AS zombies - |UNWIND zombies AS zombie - |OPTIONAL MATCH - | (zombie)<-[:HAS_CREATOR]-(message:Message)<-[:LIKES]-(likerZombie:Person) - |WHERE likerZombie IN zombies - |WITH - | zombie, - | count(likerZombie) AS zombieLikeCount - |OPTIONAL MATCH - | (zombie)<-[:HAS_CREATOR]-(message:Message)<-[:LIKES]-(likerPerson:Person) - |WHERE likerPerson.creationDate < $endDate - |WITH - | zombie, - | zombieLikeCount, - | count(likerPerson) AS totalLikeCount - |RETURN - | zombie.id, - | zombieLikeCount, - | totalLikeCount, - | CASE totalLikeCount - | WHEN 0 THEN 0 - | ELSE zombieLikeCount / toFloat(totalLikeCount) - | END AS zombieScore - |ORDER BY - | zombieScore DESC, - | zombie.id ASC - |LIMIT 100 - """.stripMargin) - } - ignore("bi-22 from file: International dialog") { val stages=compileFromFile("bi-22") } - ignore("bi-22: International dialog") { - val stages = compile( - """// Q22. International dialog - |/* - | :param { - | country1: 'Mexico', - | country2: 'Indonesia' - | } - |*/ - |MATCH - | (country1:Country {name: $country1})<-[:IS_PART_OF]-(city1:City)<-[:IS_LOCATED_IN]-(person1:Person), - | (country2:Country {name: $country2})<-[:IS_PART_OF]-(city2:City)<-[:IS_LOCATED_IN]-(person2:Person) - |WITH person1, person2, city1, 0 AS score - |// subscore 1 - |OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(:Message)-[:HAS_CREATOR]->(person2) - |WITH DISTINCT person1, person2, city1, score + (CASE c WHEN null THEN 0 ELSE 4 END) AS score - |// subscore 2 - |OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(m:Message)<-[:REPLY_OF]-(:Comment)-[:HAS_CREATOR]->(person2) - |WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE 1 END) AS score - |// subscore 3 - |OPTIONAL MATCH (person1)-[k:KNOWS]-(person2) - |WITH DISTINCT person1, person2, city1, score + (CASE k WHEN null THEN 0 ELSE 15 END) AS score - |// subscore 4 - |OPTIONAL MATCH (person1)-[:LIKES]->(m:Message)-[:HAS_CREATOR]->(person2) - |WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE 10 END) AS score - |// subscore 5 - |OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(m:Message)<-[:LIKES]-(person2) - |WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE 1 END) AS score - |// preorder - |ORDER BY - | city1.name ASC, - | score DESC, - | person1.id ASC, - | person2.id ASC - |WITH - | city1, - | // using a list might be faster, but the browser query editor does not like it - | collect({score: score, person1: person1, person2: person2})[0] AS top - |RETURN - | top.person1.id, - | top.person2.id, - | city1.name, - | top.score - |ORDER BY - | top.score DESC, - | top.person1.id ASC, - | top.person2.id ASC - """.stripMargin) - } - - ignore("bi-23 from file: Holiday destinations") { + test("bi-23 from file: Holiday destinations") { val stages=compileFromFile("bi-23") } - test("bi-23: Holiday destinations") { - val stages = compile( - """// Q23. Holiday destinations - |/* - | :param { country: 'Egypt' } - |*/ - |MATCH - | (home:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- - | (:Person)<-[:HAS_CREATOR]-(message:Message)-[:IS_LOCATED_IN]->(destination:Country) - |WHERE home <> destination - |WITH - | message, - | destination, - | message.creationDate/100000000000%100 AS month - |RETURN - | count(message) AS messageCount, - | destination.name, - | month - |ORDER BY - | messageCount DESC, - | destination.name ASC, - | month ASC - |LIMIT 100 - """.stripMargin) - } - - ignore("bi-24 from file: Messages by Topic and Continent") { + test("bi-24 from file: Messages by Topic and Continent") { val stages=compileFromFile("bi-24") } - test("bi-24: Messages by Topic and Continent") { - val stages = compile( - """// Q24. Messages by Topic and Continent - |/* - | :param { tagClass: 'Single' } - |*/ - |MATCH (:TagClass {name: $tagClass})<-[:HAS_TYPE]-(:Tag)<-[:HAS_TAG]-(message:Message) - |WITH DISTINCT message - |MATCH (message)-[:IS_LOCATED_IN]->(:Country)-[:IS_PART_OF]->(continent:Continent) - |OPTIONAL MATCH (message)<-[like:LIKES]-(:Person) - |WITH - | message, - | message.creationDate/10000000000000 AS year, - | message.creationDate/100000000000%100 AS month, - | like, - | continent - |RETURN - | count(DISTINCT message) AS messageCount, - | count(like) AS likeCount, - | year, - | month, - | continent.name - |ORDER BY - | year ASC, - | month ASC, - | continent.name DESC - |LIMIT 100 - """.stripMargin) - } - ignore("bi-25 from file: Weighted paths") { val stages=compileFromFile("bi-25") } - ignore("bi-25: Weighted paths") { - val stages = compile( - """// Q25. Weighted paths - |/* - | :param { - | person1Id: 2199023264119, - | person2Id: 8796093028894, - | startDate: 20100601040000000, - | endDate: 20100701040000000 - | } - |*/ - |MATCH path = (p1:Person {id: $person1Id})-[:KNOWS*]-(p2:Person {id: $person2Id}) - |WITH p1, p2, path - |ORDER BY length(path) - |WITH p1, p2, collect(path)[0] AS path // select the shortest path - |UNWIND relationships(path) AS k - |WITH - | path, - | startNode(k) AS pA, - | endNode(k) AS pB, - | 0 AS weight - |// A to B - |// every reply (by one of the Persons) to a Post (by the other Person): 1.0 - |OPTIONAL MATCH - | (pA)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(m:Post)-[:HAS_CREATOR]->(pB), - | (m)<-[:CONTAINER_OF]-(forum:Forum) - |WHERE forum.creationDate >= $startDate AND forum.creationDate <= $endDate - |WITH path, pA, pB, weight + count(c)*1.0 AS weight - |// A to B - |// every reply (by ones of the Persons) to a Comment (by the other Person): 0.5 - |OPTIONAL MATCH - | (pA)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(m:Comment)-[:HAS_CREATOR]->(pB), - | (m)<-[:CONTAINER_OF]-(forum:Forum) - |WHERE forum.creationDate >= $startDate AND forum.creationDate <= $endDate - |WITH path, pA, pB, weight + count(c)*0.5 AS weight - |// B to A - |// every reply (by one of the Persons) to a Post (by the other Person): 1.0 - |OPTIONAL MATCH - | (pB)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(m:Post)-[:HAS_CREATOR]->(pA), - | (m)<-[:CONTAINER_OF]-(forum:Forum) - |WHERE forum.creationDate >= $startDate AND forum.creationDate <= $endDate - |WITH path, pA, pB, weight + count(c)*1.0 AS weight - |// B to A - |// every reply (by ones of the Persons) to a Comment (by the other Person): 0.5 - |OPTIONAL MATCH - | (pB)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(m:Comment)-[:HAS_CREATOR]->(pA), - | (m)<-[:CONTAINER_OF]-(forum:Forum) - |WHERE forum.creationDate >= $startDate AND forum.creationDate <= $endDate - |WITH path, pA, pB, weight + count(c)*0.5 AS weight - |RETURN - | [person IN nodes(path) | person.id] - |ORDER BY - | weight DESC - """.stripMargin) - } } diff --git a/compiler/src/test/scala/ingraph/sandbox/CompilerTest.scala b/compiler/src/test/scala/ingraph/sandbox/CompilerTest.scala index 0508ca406..cdf6fed31 100644 --- a/compiler/src/test/scala/ingraph/sandbox/CompilerTest.scala +++ b/compiler/src/test/scala/ingraph/sandbox/CompilerTest.scala @@ -26,7 +26,7 @@ case class CompilerTestConfig(querySuitePath: Option[String] = None def printAny: Boolean = printQuery || printCypher || printQPlan || printJPlan || printFPlan } -class CompilerTest extends FunSuite { +abstract class CompilerTest extends FunSuite { val config = CompilerTestConfig() val separatorLength = 77 diff --git a/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala b/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala index f8b3854e9..bea4c7505 100644 --- a/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala +++ b/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala @@ -2,889 +2,111 @@ package ingraph.ire class BiEngineTest extends EngineTest { + override val queryDir: String = "ldbc-snb-bi" + test("bi-101: Count persons") { - val stages = run( - """MATCH (p:Person)-[:IS_LOCATED_IN]->(c:country) + run("""MATCH (p:Person)-[:IS_LOCATED_IN]->(c:country) |RETURN p, count(c) AS cc """.stripMargin) } - test("bi-01: Posting summary") { - val stages = run( - """// Q1. Posting summary - |/* - | :param { date: 20110721220000000 } - |*/ - |MATCH (message:Message) - |WHERE message.creationDate <= $date - |WITH toFloat(count(message)) AS totalMessageCount // this should be a subquery once Cypher supports it - |MATCH (message:Message) - |WHERE message.creationDate <= $date - | AND message.content IS NOT NULL - |WITH - | totalMessageCount, - | message, - | message.creationDate/10000000000000 AS year - |WITH - | totalMessageCount, - | year, - | (message:Comment) AS isComment, - | CASE - | WHEN message.length < 40 THEN 0 - | WHEN message.length < 80 THEN 1 - | WHEN message.length < 160 THEN 2 - | ELSE 3 - | END AS lengthCategory, - | count(message) AS messageCount, - | floor(avg(message.length)) AS averageMessageLength, - | sum(message.length) AS sumMessageLength - |RETURN - | year, - | isComment, - | lengthCategory, - | messageCount, - | averageMessageLength, - | sumMessageLength, - | messageCount / totalMessageCount AS percentageOfMessages - |ORDER BY - | year DESC, - | isComment ASC, - | lengthCategory ASC - """.stripMargin) + test("bi-01 from file: Posting summary") { + runFromFile("bi-01") } - test("bi-02: Top tags for country, age, gender, time") { - val stages = run( - """// Q2. Top tags for country, age, gender, time - |/* - | :param { - | date1: 20091231230000000, - | date2: 20101107230000000, - | country1: 'Ethiopia', - | country2: 'Belarus' - | } - |*/ - |MATCH - | (country:Country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person:Person) - | <-[:HAS_CREATOR]-(message:Message)-[:HAS_TAG]->(tag:Tag) - |WHERE message.creationDate >= $startDate - | AND message.creationDate <= $endDate - | AND (country.name = $country1 OR country.name = $country2) - |WITH - | country.name AS countryName, - | message.creationDate/100000000000%100 AS month, - | person.gender AS gender, - | floor((20130101 - person.birthday) / 10000 / 5.0) AS ageGroup, - | tag.name AS tagName, - | message - |WITH - | countryName, month, gender, ageGroup, tagName, count(message) AS messageCount - |WHERE messageCount > 100 - |RETURN - | countryName, - | month, - | gender, - | ageGroup, - | tagName, - | messageCount - |ORDER BY - | messageCount DESC, - | tagName ASC, - | ageGroup ASC, - | gender ASC, - | month ASC, - | countryName ASC - |LIMIT 100 - """.stripMargin) + ignore("bi-02 from file: Top tags for country, age, gender, time") { + runFromFile("bi-02") } - test("bi-03: Tag evolution") { - val stages = run( - """// Q3. Tag evolution - |/* - | :param { - | year: 2010, - | month: 10 - | } - |*/ - |WITH - | $year AS year1, - | $month AS month1, - | $year + toInteger($month / 12.0) AS year2, - | $month % 12 + 1 AS month2 - |// year-month 1 - |MATCH (tag:Tag) - |OPTIONAL MATCH (message1:Message)-[:HAS_TAG]->(tag) - | WHERE message1.creationDate/10000000000000 = year1 - | AND message1.creationDate/100000000000%100 = month1 - |WITH year2, month2, tag, count(message1) AS countMonth1 - |// year-month 2 - |OPTIONAL MATCH (message2:Message)-[:HAS_TAG]->(tag) - | WHERE message2.creationDate/10000000000000 = year2 - | AND message2.creationDate/100000000000%100 = month2 - |WITH - | tag, - | countMonth1, - | count(message2) AS countMonth2 - |RETURN - | tag.name, - | countMonth1, - | countMonth2, - | abs(countMonth1-countMonth2) AS diff - |ORDER BY - | diff DESC, - | tag.name ASC - |LIMIT 100 - """.stripMargin) + ignore("bi-03 from file: Tag evolution") { + runFromFile("bi-03") } - test("bi-04: Popular topics in a country") { - val stages = run( - """// Q4. Popular topics in a country - |/* - | :param { - | tagClass: 'MusicalArtist', - | country: 'Burma' - | } - |*/ - |MATCH - | (:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- - | (person:Person)<-[:HAS_MODERATOR]-(forum:Forum)-[:CONTAINER_OF]-> - | (post:Post)-[:HAS_TAG]->(:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass}) - |RETURN - | forum.id, - | forum.title, - | forum.creationDate, - | person.id, - | count(DISTINCT post) AS postCount - |ORDER BY - | postCount DESC, - | forum.id ASC - |LIMIT 20 """.stripMargin) + test("bi-04 from file: Popular topics in a country") { + runFromFile("bi-04") } - test("bi-05: Top posters in a country") { - val stages = run( - """// Q5. Top posters in a country - |/* - | :param { country: 'Belarus' } - |*/ - |MATCH - | (:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- - | (person:Person)<-[:HAS_MEMBER]-(forum:Forum) - |WITH forum, count(person) AS numberOfMembers - |ORDER BY numberOfMembers DESC, forum.id ASC - |LIMIT 100 - |WITH collect(forum) AS popularForums - |UNWIND popularForums AS forum - |MATCH - | (forum)-[:HAS_MEMBER]->(person:Person) - |OPTIONAL MATCH - | (person)<-[:HAS_CREATOR]-(post:Post)<-[:CONTAINER_OF]-(popularForum:Forum) - |WHERE popularForum IN popularForums - |RETURN - | person.id, - | person.firstName, - | person.lastName, - | person.creationDate, - | count(DISTINCT post) AS postCount - |ORDER BY - | postCount DESC, - | person.id ASC - |LIMIT 100 - """.stripMargin) + ignore("bi-05 from file: Top posters in a country") { + runFromFile("bi-05") } - test("bi-06: Most active Posters of a given Topic") { - val stages = run( - """// Q6. Most active Posters of a given Topic - |/* - | :param { tag: 'Abbas_I_of_Persia' } - |*/ - |MATCH (tag:Tag {name: $tag})<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person:Person) - |OPTIONAL MATCH (:Person)-[like:LIKES]->(message) - |OPTIONAL MATCH (message)<-[:REPLY_OF]-(comment:Comment) - |WITH person, count(DISTINCT like) AS likeCount, count(DISTINCT comment) AS replyCount, count(DISTINCT message) AS messageCount - |RETURN - | person.id, - | messageCount, - | replyCount, - | likeCount, - | 1*messageCount + 2*replyCount + 10*likeCount AS score - |ORDER BY - | score DESC, - | person.id ASC - |LIMIT 100 - """.stripMargin) + test("bi-06 from file: Most active Posters of a given Topic") { + runFromFile("bi-06") } - test("bi-07: Most authoritative users on a given topic") { - val stages = run( - """// Q7. Most authoritative users on a given topic - |/* - | :param { tag: 'Arnold_Schwarzenegger' } - |*/ - |MATCH (tag:Tag {name: $tag}) - |MATCH (tag)<-[:HAS_TAG]-(message1:Message)-[:HAS_CREATOR]->(person1:Person) - |MATCH (tag)<-[:HAS_TAG]-(message2:Message)-[:HAS_CREATOR]->(person1) - |OPTIONAL MATCH (message2)<-[:LIKES]-(person2:Person) - |OPTIONAL MATCH (person2)<-[:HAS_CREATOR]-(message3:Message)<-[like:LIKES]-(p3:Person) - |RETURN - | person1.id, - | count(DISTINCT like) AS authorityScore - |ORDER BY - | authorityScore DESC, - | person1.id ASC - |LIMIT 100 - """.stripMargin) + test("bi-07 from file: Most authoritative users on a given topic") { + runFromFile("bi-07") } - test("bi-08: Related Topics") { - val stages = run( - """// Q8. Related Topics - |/* - | :param { tag: 'Genghis_Khan' } - |*/ - |MATCH - | (tag:Tag {name: $tag})<-[:HAS_TAG]-(message:Message), - | (message)<-[:REPLY_OF]-(comment:Comment)-[:HAS_TAG]->(relatedTag:Tag) - | // there is no need to filter for relatedTag.name != $tag, as the edge uniqueness constraint takes care of that - |WHERE NOT (comment)-[:HAS_TAG]->(tag) - |RETURN - | relatedTag.name, - | count(DISTINCT comment) AS count - |ORDER BY - | count DESC, - | relatedTag.name ASC - |LIMIT 100 - """.stripMargin) + test("bi-08 from file: Related Topics") { + runFromFile("bi-08") } - test("bi-09: Forum with related Tags") { - val stages = run( - """// Q9. Forum with related Tags - |/* - | :param { - | tagClass1: 'BaseballPlayer', - | tagClass2: 'ChristianBishop', - | threshold: 200 - | } - |*/ - |MATCH - | (forum:Forum)-[:HAS_MEMBER]->(person:Person) - |WITH forum, count(person) AS members - |WHERE members > $threshold - |MATCH - | (forum)-[:CONTAINER_OF]->(post1:Post)-[:HAS_TAG]-> - | (:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass1}) - |WITH forum, count(DISTINCT post1) AS count1 - |MATCH - | (forum)-[:CONTAINER_OF]->(post2:Post)-[:HAS_TAG]-> - | (:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass2}) - |WITH forum, count1, count(DISTINCT post2) AS count2 - |RETURN - | forum.id, - | count1, - | count2 - |ORDER BY - | abs(count2-count1) DESC, - | forum.id ASC - |LIMIT 100 - """.stripMargin) + test("bi-09 from file: Forum with related Tags") { + runFromFile("bi-09") } - test("bi-10: Central Person for a Tag") { - val stages = run( - """// Q10. Central Person for a Tag - |/* - | :param { - | tag: 'John_Rhys-Davies', - | date: 20120122000000000 - | } - |*/ - |MATCH (tag:Tag {name: $tag}) - |// score - |OPTIONAL MATCH (tag)<-[interest:HAS_INTEREST]-(person:Person) - |WITH tag, collect(person) AS interestedPersons - |OPTIONAL MATCH (tag)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person:Person) - | WHERE message.creationDate > $date - |WITH tag, interestedPersons + collect(person) AS persons - |UNWIND persons AS person - |// poor man's disjunct union (should be changed to UNION + post-union processing in the future) - |WITH DISTINCT tag, person - |OPTIONAL MATCH (tag)<-[interest:HAS_INTEREST]-(person:Person) - |WITH - | tag, - | person, - | 100 * count(interest) AS score - |OPTIONAL MATCH (tag)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person) - | WHERE message.creationDate > $date - |WITH - | tag, - | person, - | score + count(message) AS score - |MATCH (person)-[:KNOWS]-(friend) - |WITH - | tag, - | person, - | score, - | friend - |OPTIONAL MATCH (tag)<-[interest:HAS_INTEREST]-(friend:Person) - |WITH - | tag, - | person, - | score, - | friend, - | 100 * count(interest) AS friendScore - |OPTIONAL MATCH (tag)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(friend) - | WHERE message.creationDate > $date - |WITH - | person, - | score, - | friend, - | friendScore + count(message) AS friendScore - |RETURN - | person.id, - | score, - | sum(friendScore) AS friendsScore - |ORDER BY - | score + friendsScore DESC, - | person.id ASC - |LIMIT 100 - """.stripMargin) + ignore("bi-10 from file: Central Person for a Tag") { + runFromFile("bi-10") } - ignore("bi-11: Unrelated replies") { - val stages = run( - """// Q11. Unrelated replies - |/* - | :param { - | country: 'Germany', - | blacklist: ['also', 'Pope', 'that', 'James', 'Henry', 'one', 'Green'] - | } - |*/ - |WITH $blacklist AS blacklist - |MATCH - | (country:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- - | (person:Person)<-[:HAS_CREATOR]-(reply:Comment)-[:REPLY_OF]->(message:Message), - | (reply)-[:HAS_TAG]->(tag:Tag) - |WHERE NOT (message)-[:HAS_TAG]->(:Tag)<-[:HAS_TAG]-(reply) - | AND size([word IN blacklist WHERE reply.content CONTAINS word | word]) = 0 - |OPTIONAL MATCH - | (:Person)-[like:LIKES]->(reply) - |RETURN - | person.id, - | tag.name, - | count(DISTINCT like) AS countLikes, - | count(DISTINCT reply) AS countReplies - |ORDER BY - | countLikes DESC, - | person.id ASC, - | tag.name ASC - |LIMIT 100 - """.stripMargin) + ignore("bi-11 from file: Unrelated replies") { + runFromFile("bi-11") } - test("bi-12: Trending Posts") { - val stages = run( - """// Q12. Trending Posts - |/* - | :param { - | date: 20110721220000000, - | likeThreshold: 400 - | } - |*/ - |MATCH - | (message:Message)-[:HAS_CREATOR]->(creator:Person), - | (message)<-[like:LIKES]-(:Person) - |WHERE message.creationDate > $date - |WITH message, creator, count(like) AS likeCount - |WHERE likeCount > $likeThreshold - |RETURN - | message.id, - | message.creationDate, - | creator.firstName, - | creator.lastName, - | likeCount - |ORDER BY - | likeCount DESC, - | message.id ASC - |LIMIT 100 - """.stripMargin) + test("bi-12 from file: Trending Posts") { + runFromFile("bi-12") } - // list comprehension - ignore("bi-13: Popular Tags per month in a country") { - val stages = run( - """// Q13. Popular Tags per month in a country - |/* - | :param { country: 'Burma' } - |*/ - |MATCH (:Country {name: $country})<-[:IS_LOCATED_IN]-(message:Message) - |OPTIONAL MATCH (message)-[:HAS_TAG]->(tag:Tag) - |WITH - | message.creationDate/10000000000000 AS year, - | message.creationDate/100000000000%100 AS month, - | message, - | tag - |WITH year, month, count(message) AS popularity, tag - |ORDER BY popularity DESC, tag.name ASC - |WITH - | year, - | month, - | collect([tag.name, popularity]) AS popularTags - |WITH - | year, - | month, - | [popularTag IN popularTags WHERE popularTag[0] IS NOT NULL] AS popularTags - |RETURN - | year, - | month, - | [i IN range(0, (CASE size(popularTags) < 5 WHEN true THEN size(popularTags) ELSE 5 END)-1) - | | popularTags[i]] AS topPopularTags - |ORDER BY - | year DESC, - | month ASC - |LIMIT 100 - """.stripMargin) + ignore("bi-13 from file: Popular Tags per month in a country") { + runFromFile("bi-13") } - test("bi-14: Top thread initiators") { - val stages = run( - """// Q14. Top thread initiators - |/* - | :param { - | startDate: 20120531220000000, - | endDate: 20120630220000000 - | } - |*/ - |MATCH (person:Person)<-[:HAS_CREATOR]-(post:Post)<-[:REPLY_OF*0..]-(reply:Message) - |WHERE post.creationDate >= $startDate - | AND post.creationDate <= $endDate - | AND reply.creationDate >= $startDate - | AND reply.creationDate <= $endDate - |RETURN - | person.id, - | person.firstName, - | person.lastName, - | count(DISTINCT post) AS threadCount, - | count(DISTINCT reply) AS messageCount - |ORDER BY - | messageCount DESC, - | person.id ASC - |LIMIT 100 - """.stripMargin) + test("bi-14 from file: Top thread initiators") { + runFromFile("bi-14") } - test("bi-15: Social normals") { - val stages = run( - """// Q15. Social normals - |/* - | :param { country: 'Burma' } - |*/ - |MATCH - | (country:Country {name: $country}) - |MATCH - | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person1:Person) - |OPTIONAL MATCH - | // start a new MATCH as friend might live in the same City - | // and thus can reuse the IS_PART_OF edge - | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(friend1:Person), - | (person1)-[:KNOWS]-(friend1) - |WITH country, person1, count(friend1) AS friend1Count - |WITH country, floor(avg(friend1Count)) AS socialNormal - |MATCH - | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person2:Person) - |MATCH - | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(friend2:Person), - | (person2)-[:KNOWS]-(friend2) - |WITH country, person2, count(friend2) AS friend2Count, socialNormal - |WHERE friend2Count = socialNormal - |RETURN - | person2.id, - | friend2Count AS count - |ORDER BY - | person2.id ASC - |LIMIT 100 - """.stripMargin) + test("bi-15 from file: Social normals") { + runFromFile("bi-15") } - ignore("bi-16: Experts in social circle") { - val stages = run( - """// Q16. Experts in social circle - |/* - | :param { - | personId: 6597069777419, - | country: 'Pakistan', - | tagClass: 'MusicalArtist', - | minPathDistance: 3, - | maxPathDistance: 5 - | } - |*/ - |// This query will not work in a browser as is. I tried alternatives approaches, - |// e.g. enabling path of arbitrary lengths, saving the path to a variable p and - |// checking for `$minPathDistance <= length(p)`, but these could not be - |// evaluated due to the excessive amount of paths. - |// If you would like to test the query in the browser, replace the values of - |// $minPathDistance and $maxPathDistance to a constant. - |MATCH - | (:Person {id: $personId})-[:KNOWS*$minPathDistance..$maxPathDistance]- - | (person:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]-> - | (:Country {name: $country}), - | (person)<-[:HAS_CREATOR]-(message:Message)-[:HAS_TAG]->(:Tag)-[:HAS_TYPE]-> - | (:TagClass {name: $tagClass}) - |MATCH (message)-[:HAS_TAG]->(tag:Tag) - |RETURN - | person.id, - | tag.name, - | count(message) AS messageCount - |ORDER BY - | messageCount DESC, - | tag.name ASC, - | person.id ASC - |LIMIT 100 - """.stripMargin) + ignore("bi-16 from file: Experts in social circle") { + runFromFile("bi-16") } - test("bi-17: Friend triangles") { - val stages = run( - """// Q17. Friend triangles - |/* - | :param { country: 'Spain' } - |*/ - |MATCH (country:Country {name: $country}) - |MATCH (a:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) - |MATCH (b:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) - |MATCH (c:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) - |MATCH (a)-[:KNOWS]-(b), (b)-[:KNOWS]-(c), (c)-[:KNOWS]-(a) - |WHERE a.id < b.id - | AND b.id < c.id - |RETURN count(*) - |// as a less elegant solution, count(a) also works - """.stripMargin) + test("bi-17 from file: Friend triangles") { + runFromFile("bi-17") } - test("bi-18: How many persons have a given number of posts") { - val stages = run( - """// Q18. How many persons have a given number of posts - |/* - | :param { - | date: 20110722000000000, - | lengthThreshold: 20, - | languages: ['ar'] - | } - |*/ - |MATCH (person:Person) - |OPTIONAL MATCH (person)<-[:HAS_CREATOR]-(message:Message)-[:REPLY_OF*0..]->(post:Post) - |WHERE message.content IS NOT NULL - | AND message.length < $lengthThreshold - | AND message.creationDate > $date - | AND post.language IN $languages - |WITH - | person, - | count(message) AS messageCount - |RETURN - | messageCount, - | count(person) AS personCount - |ORDER BY - | personCount DESC, - | messageCount DESC - """.stripMargin) + test("bi-18 from file: How many persons have a given number of posts") { + runFromFile("bi-18") } - test("bi-19: Stranger's interaction") { - val stages = run( - """// Q19. Stranger's interaction - |/* - | :param { - | date: 19890101, - | tagClass1: 'MusicalArtist', - | tagClass2: 'OfficeHolder' - | } - |*/ - |MATCH - | (:TagClass {name: $tagClass1})<-[:HAS_TYPE]-(:Tag)<-[:HAS_TAG]- - | (forum1:Forum)-[:HAS_MEMBER]->(stranger:Person) - |WITH DISTINCT stranger - |MATCH - | (:TagClass {name: $tagClass2})<-[:HAS_TYPE]-(:Tag)<-[:HAS_TAG]- - | (forum2:Forum)-[:HAS_MEMBER]->(stranger) - |WITH DISTINCT stranger - |MATCH - | (person:Person)<-[:HAS_CREATOR]-(:Message)-[:REPLY_OF]- - | (:Message)-[:HAS_CREATOR]->(stranger) - |WHERE person.birthday > $date - | AND person <> stranger - | AND NOT (person)-[:KNOWS]-(stranger) - |WITH person, stranger - |OPTIONAL MATCH - | (person)<-[:HAS_CREATOR]-(comment1:Comment)-[:REPLY_OF]->(:Message)-[:HAS_CREATOR]->(stranger) - |OPTIONAL MATCH - | (stranger)<-[:HAS_CREATOR]-(comment2:Comment)-[:REPLY_OF]->(:Message)-[:HAS_CREATOR]->(person) - |WITH - | person, - | count(stranger) AS strangersCount, - | count(comment1) AS comment1Count, - | count(comment2) AS comment2Count - |RETURN - | person.id, - | strangersCount, - | comment1Count + comment2Count AS interactionCount - |ORDER BY - | interactionCount DESC, - | person.id ASC - |LIMIT 100 - """.stripMargin) + test("bi-19 from file: Stranger's interaction") { + runFromFile("bi-19") } - test("bi-20: High-level topics") { - val stages = run( - """// Q20. High-level topics - |/* - | :param { tagClasses: ['Writer', 'Single', 'Country'] } - |*/ - |UNWIND $tagClasses AS tagClassName - |MATCH - | (tagClass:TagClass {name: tagClassName})<-[:IS_SUBCLASS_OF*0..]- - | (:TagClass)<-[:HAS_TYPE]-(tag:Tag)<-[:HAS_TAG]-(message:Message) - |RETURN - | tagClass.name, - | count(message) AS postCount - |ORDER BY - | postCount DESC, - | tagClass.name ASC - |LIMIT 100 - """.stripMargin) + test("bi-20 from file: High-level topics") { + runFromFile("bi-20") } - test("bi-21: Zombies in a country") { - val stages = run( - """// Q21. Zombies in a country - |/* - | :param { - | country: 'Ethiopia', - | endDate: 20130101000000000 - | } - |*/ - |MATCH (country:Country {name: $country}) - |WITH - | country, - | $endDate/10000000000000 AS endDateYear, - | $endDate/100000000000%100 AS endDateMonth - |MATCH - | (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(zombie:Person) - |OPTIONAL MATCH - | (zombie)<-[:HAS_CREATOR]-(message:Message) - |WHERE zombie.creationDate < $endDate - | AND message.creationDate < $endDate - |WITH - | country, - | zombie, - | endDateYear, - | endDateMonth, - | zombie.creationDate/10000000000000 AS zombieCreationYear, - | zombie.creationDate/100000000000%100 AS zombieCreationMonth, - | count(message) AS messageCount - |WITH - | country, - | zombie, - | 12 * (endDateYear - zombieCreationYear ) - | + (endDateMonth - zombieCreationMonth) - | + 1 AS months, - | messageCount - |WHERE messageCount / months < 1 - |WITH - | country, - | collect(zombie) AS zombies - |UNWIND zombies AS zombie - |OPTIONAL MATCH - | (zombie)<-[:HAS_CREATOR]-(message:Message)<-[:LIKES]-(likerZombie:Person) - |WHERE likerZombie IN zombies - |WITH - | zombie, - | count(likerZombie) AS zombieLikeCount - |OPTIONAL MATCH - | (zombie)<-[:HAS_CREATOR]-(message:Message)<-[:LIKES]-(likerPerson:Person) - |WHERE likerPerson.creationDate < $endDate - |WITH - | zombie, - | zombieLikeCount, - | count(likerPerson) AS totalLikeCount - |RETURN - | zombie.id, - | zombieLikeCount, - | totalLikeCount, - | CASE totalLikeCount - | WHEN 0 THEN 0 - | ELSE zombieLikeCount / toFloat(totalLikeCount) - | END AS zombieScore - |ORDER BY - | zombieScore DESC, - | zombie.id ASC - |LIMIT 100 - """.stripMargin) + ignore("bi-21 from file: Zombies in a country") { + runFromFile("bi-21") } - ignore("bi-22: International dialog") { - val stages = run( - """// Q22. International dialog - |/* - | :param { - | country1: 'Mexico', - | country2: 'Indonesia' - | } - |*/ - |MATCH - | (country1:Country {name: $country1})<-[:IS_PART_OF]-(city1:City)<-[:IS_LOCATED_IN]-(person1:Person), - | (country2:Country {name: $country2})<-[:IS_PART_OF]-(city2:City)<-[:IS_LOCATED_IN]-(person2:Person) - |WITH person1, person2, city1, 0 AS score - |// subscore 1 - |OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(:Message)-[:HAS_CREATOR]->(person2) - |WITH DISTINCT person1, person2, city1, score + (CASE c WHEN null THEN 0 ELSE 4 END) AS score - |// subscore 2 - |OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(m:Message)<-[:REPLY_OF]-(:Comment)-[:HAS_CREATOR]->(person2) - |WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE 1 END) AS score - |// subscore 3 - |OPTIONAL MATCH (person1)-[k:KNOWS]-(person2) - |WITH DISTINCT person1, person2, city1, score + (CASE k WHEN null THEN 0 ELSE 15 END) AS score - |// subscore 4 - |OPTIONAL MATCH (person1)-[:LIKES]->(m:Message)-[:HAS_CREATOR]->(person2) - |WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE 10 END) AS score - |// subscore 5 - |OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(m:Message)<-[:LIKES]-(person2) - |WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE 1 END) AS score - |// preorder - |ORDER BY - | city1.name ASC, - | score DESC, - | person1.id ASC, - | person2.id ASC - |WITH - | city1, - | // using a list might be faster, but the browser query editor does not like it - | collect({score: score, person1: person1, person2: person2})[0] AS top - |RETURN - | top.person1.id, - | top.person2.id, - | city1.name, - | top.score - |ORDER BY - | top.score DESC, - | top.person1.id ASC, - | top.person2.id ASC - """.stripMargin) + ignore("bi-22 from file: International dialog") { + runFromFile("bi-22") } - test("bi-23: Holiday destinations") { - val stages = run( - """// Q23. Holiday destinations - |/* - | :param { country: 'Egypt' } - |*/ - |MATCH - | (home:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- - | (:Person)<-[:HAS_CREATOR]-(message:Message)-[:IS_LOCATED_IN]->(destination:Country) - |WHERE home <> destination - |WITH - | message, - | destination, - | message.creationDate/100000000000%100 AS month - |RETURN - | count(message) AS messageCount, - | destination.name, - | month - |ORDER BY - | messageCount DESC, - | destination.name ASC, - | month ASC - |LIMIT 100 - """.stripMargin) + test("bi-23 from file: Holiday destinations") { + runFromFile("bi-23") } - test("bi-24: Messages by Topic and Continent") { - val stages = run( - """// Q24. Messages by Topic and Continent - |/* - | :param { tagClass: 'Single' } - |*/ - |MATCH (:TagClass {name: $tagClass})<-[:HAS_TYPE]-(:Tag)<-[:HAS_TAG]-(message:Message) - |WITH DISTINCT message - |MATCH (message)-[:IS_LOCATED_IN]->(:Country)-[:IS_PART_OF]->(continent:Continent) - |OPTIONAL MATCH (message)<-[like:LIKES]-(:Person) - |WITH - | message, - | message.creationDate/10000000000000 AS year, - | message.creationDate/100000000000%100 AS month, - | like, - | continent - |RETURN - | count(DISTINCT message) AS messageCount, - | count(like) AS likeCount, - | year, - | month, - | continent.name - |ORDER BY - | year ASC, - | month ASC, - | continent.name DESC - |LIMIT 100 - """.stripMargin) + test("bi-24 from file: Messages by Topic and Continent") { + runFromFile("bi-24") } - ignore("bi-25: Weighted paths") { - val stages = run( - """// Q25. Weighted paths - |/* - | :param { - | person1Id: 2199023264119, - | person2Id: 8796093028894, - | startDate: 20100601040000000, - | endDate: 20100701040000000 - | } - |*/ - |MATCH path = (p1:Person {id: $person1Id})-[:KNOWS*]-(p2:Person {id: $person2Id}) - |WITH p1, p2, path - |ORDER BY length(path) - |WITH p1, p2, collect(path)[0] AS path // select the shortest path - |UNWIND relationships(path) AS k - |WITH - | path, - | startNode(k) AS pA, - | endNode(k) AS pB, - | 0 AS weight - |// A to B - |// every reply (by one of the Persons) to a Post (by the other Person): 1.0 - |OPTIONAL MATCH - | (pA)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(m:Post)-[:HAS_CREATOR]->(pB), - | (m)<-[:CONTAINER_OF]-(forum:Forum) - |WHERE forum.creationDate >= $startDate AND forum.creationDate <= $endDate - |WITH path, pA, pB, weight + count(c)*1.0 AS weight - |// A to B - |// every reply (by ones of the Persons) to a Comment (by the other Person): 0.5 - |OPTIONAL MATCH - | (pA)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(m:Comment)-[:HAS_CREATOR]->(pB), - | (m)<-[:CONTAINER_OF]-(forum:Forum) - |WHERE forum.creationDate >= $startDate AND forum.creationDate <= $endDate - |WITH path, pA, pB, weight + count(c)*0.5 AS weight - |// B to A - |// every reply (by one of the Persons) to a Post (by the other Person): 1.0 - |OPTIONAL MATCH - | (pB)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(m:Post)-[:HAS_CREATOR]->(pA), - | (m)<-[:CONTAINER_OF]-(forum:Forum) - |WHERE forum.creationDate >= $startDate AND forum.creationDate <= $endDate - |WITH path, pA, pB, weight + count(c)*1.0 AS weight - |// B to A - |// every reply (by ones of the Persons) to a Comment (by the other Person): 0.5 - |OPTIONAL MATCH - | (pB)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(m:Comment)-[:HAS_CREATOR]->(pA), - | (m)<-[:CONTAINER_OF]-(forum:Forum) - |WHERE forum.creationDate >= $startDate AND forum.creationDate <= $endDate - |WITH path, pA, pB, weight + count(c)*0.5 AS weight - |RETURN - | [person IN nodes(path) | person.id] - |ORDER BY - | weight DESC - """.stripMargin) + ignore("bi-25 from file: Weighted paths") { + runFromFile("bi-25") } - } diff --git a/ire-adapter/src/test/scala/ingraph/ire/EngineTest.scala b/ire-adapter/src/test/scala/ingraph/ire/EngineTest.scala index 32ead3e0a..8066a0768 100644 --- a/ire-adapter/src/test/scala/ingraph/ire/EngineTest.scala +++ b/ire-adapter/src/test/scala/ingraph/ire/EngineTest.scala @@ -2,7 +2,9 @@ package ingraph.ire import org.scalatest.FunSuite -class EngineTest extends FunSuite { +abstract class EngineTest extends FunSuite { + + val queryDir: String def run(readQuery: String): Unit = { val indexer = new Indexer() @@ -10,4 +12,10 @@ class EngineTest extends FunSuite { val result = readAdapter.result() } + def runFromFile(fileBaseName: String): Unit = { + val source = scala.io.Source.fromFile(s"../queries/${queryDir}/${fileBaseName}.cypher") + val queryString = try source.getLines.mkString("\n") finally source.close() + run(queryString) + } + } From 7e21041e442aff10d31f2a9f4f470cbca86af80e Mon Sep 17 00:00:00 2001 From: Gabor Szarnyas Date: Thu, 8 Feb 2018 16:14:30 +0100 Subject: [PATCH 10/11] Ignore breaking adapter tests --- .../test/scala/ingraph/ire/BiEngineTest.scala | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala b/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala index bea4c7505..797b6ba5b 100644 --- a/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala +++ b/ire-adapter/src/test/scala/ingraph/ire/BiEngineTest.scala @@ -10,7 +10,8 @@ class BiEngineTest extends EngineTest { """.stripMargin) } - test("bi-01 from file: Posting summary") { + // compiles + ignore("bi-01 from file: Posting summary") { runFromFile("bi-01") } @@ -42,7 +43,8 @@ class BiEngineTest extends EngineTest { runFromFile("bi-08") } - test("bi-09 from file: Forum with related Tags") { + // compiles + ignore("bi-09 from file: Forum with related Tags") { runFromFile("bi-09") } @@ -62,11 +64,13 @@ class BiEngineTest extends EngineTest { runFromFile("bi-13") } - test("bi-14 from file: Top thread initiators") { + // compiles + ignore("bi-14 from file: Top thread initiators") { runFromFile("bi-14") } - test("bi-15 from file: Social normals") { + // compiles + ignore("bi-15 from file: Social normals") { runFromFile("bi-15") } @@ -78,15 +82,18 @@ class BiEngineTest extends EngineTest { runFromFile("bi-17") } - test("bi-18 from file: How many persons have a given number of posts") { + // compiles + ignore("bi-18 from file: How many persons have a given number of posts") { runFromFile("bi-18") } - test("bi-19 from file: Stranger's interaction") { + // compiles + ignore("bi-19 from file: Stranger's interaction") { runFromFile("bi-19") } - test("bi-20 from file: High-level topics") { + // compiles + ignore("bi-20 from file: High-level topics") { runFromFile("bi-20") } @@ -102,7 +109,8 @@ class BiEngineTest extends EngineTest { runFromFile("bi-23") } - test("bi-24 from file: Messages by Topic and Continent") { + // compiles + ignore ("bi-24 from file: Messages by Topic and Continent") { runFromFile("bi-24") } From 727251c76237c0c93a496441e42f1f3c7f21c683 Mon Sep 17 00:00:00 2001 From: Gabor Szarnyas Date: Thu, 8 Feb 2018 16:14:47 +0100 Subject: [PATCH 11/11] Unignore some DML tests for compiler --- compiler/src/test/scala/ingraph/sandbox/DmlTest.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler/src/test/scala/ingraph/sandbox/DmlTest.scala b/compiler/src/test/scala/ingraph/sandbox/DmlTest.scala index 5a70cd389..9268f0ab3 100644 --- a/compiler/src/test/scala/ingraph/sandbox/DmlTest.scala +++ b/compiler/src/test/scala/ingraph/sandbox/DmlTest.scala @@ -29,19 +29,19 @@ class DmlTest extends FunSuite { println(plan) } - ignore("should compile simple DELETE for a vertex") { + test("should compile simple DELETE for a vertex") { val cypher = CypherParser.parseString("MATCH (n:Person) DELETE n") val plan = CypherToQPlan.build(cypher) println(plan) } - ignore("should compile simple DELETE for an edge") { + test("should compile simple DELETE for an edge") { val cypher = CypherParser.parseString("MATCH (p1:Person)-[r:KNOWS]-(p2:Person) DELETE r") val plan = CypherToQPlan.build(cypher) println(plan) } - ignore("should compile simple DELETE for an entire pattern") { + test("should compile simple DELETE for an entire pattern") { val cypher = CypherParser.parseString("MATCH (p1:Person)-[r:KNOWS]-(p2:Person) DELETE p1, r, p2") val plan = CypherToQPlan.build(cypher) println(plan)