This repository was archived by the owner on Apr 6, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgraph.cypher
145 lines (97 loc) · 4.94 KB
/
graph.cypher
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
////////////////// Import Nodes //////////////////
// 1️⃣ Import Vendors
CREATE INDEX vendor_id_index FOR (v:Vendor) ON (v.vendor_id);
LOAD CSV WITH HEADERS FROM 'file:///df_for_Neo4j/Nodes/vendor.csv' AS row
MERGE (v:Vendor {vendor_id: row.vendor_id});
// 2️⃣ Import Trips
CREATE INDEX trip_id_index FOR (t:Trip) ON (t.trip_id);
LOAD CSV WITH HEADERS FROM 'file:///df_for_Neo4j/Nodes/trip.csv' AS row
MERGE (t:Trip {trip_id: row.trip_id})
SET t.tpep_pickup_datetime = row.tpep_pickup_datetime,
t.tpep_dropoff_datetime = row.tpep_dropoff_datetime,
t.trip_distance = toFloat(row.trip_distance),
t.RatecodeID = row.RatecodeID,
t.store_and_fwd_flag = row.store_and_fwd_flag;
// 3️⃣ Import Passenger Groups
CREATE INDEX passenger_count_id_index FOR (p:PassengerGroup) ON (p.passenger_count_id);
LOAD CSV WITH HEADERS FROM 'file:///df_for_Neo4j/Nodes/passenger_group.csv' AS row
MERGE (p:PassengerGroup {passenger_count_id: row.passenger_count_id})
SET p.passenger_count = toInteger(row.passenger_count);
// 4️⃣ Import Locations
CREATE INDEX location_id_index FOR (l:Location) ON (l.location_id);
LOAD CSV WITH HEADERS FROM 'file:///df_for_Neo4j/Nodes/location.csv' AS row
MERGE (l:Location {location_id: row.location_id});
// 5️⃣ Import Payment Types
CREATE INDEX payment_type_id_index FOR (p:PaymentType) ON (p.payment_type_id);
LOAD CSV WITH HEADERS FROM 'file:///df_for_Neo4j/Nodes/payement_type.csv' AS row
MERGE (p:PaymentType {payment_type_id: row.payment_type_id})
SET p.payment_type_description = row.payment_type_description;
////////////////// Import Relations //////////////////
// 1️⃣ Relation between Trip and Vendor
LOAD CSV WITH HEADERS FROM 'file:///df_for_Neo4j/Relations/trip-vendor.csv' AS row
MATCH (t:Trip {trip_id: row.trip_id}), (v:Vendor {vendor_id: row.vendor_id})
MERGE (v)-[:PROVIDED]->(t);
// 2️⃣ Relation between Trip and Passenger Group
LOAD CSV WITH HEADERS FROM 'file:///df_for_Neo4j/Relations/trip-passenger_group.csv' AS row
MATCH (t:Trip {trip_id: row.trip_id}), (p:PassengerGroup {passenger_count_id: row.passenger_count_id})
MERGE (t)-[:HAS_PASSENGERS]->(p);
// 3️⃣ Relation between Trip and Start Location
LOAD CSV WITH HEADERS FROM 'file:///df_for_Neo4j/Relations/trip-location_start.csv' AS row
MATCH (t:Trip {trip_id: row.trip_id}), (l:Location {location_id: row.location_id})
MERGE (t)-[:STARTED_AT]->(l);
// 4️⃣ Relation between Trip and End Location
LOAD CSV WITH HEADERS FROM 'file:///df_for_Neo4j/Relations/trip-location_end.csv' AS row
MATCH (t:Trip {trip_id: row.trip_id}), (l:Location {location_id: row.location_id})
MERGE (t)-[:ENDED_AT]->(l);
// 5️⃣ Relation between Trip and Payment Type with cost details
LOAD CSV WITH HEADERS FROM 'file:///df_for_Neo4j/Relations/trip-payement.csv' AS row
MATCH (t:Trip {trip_id: row.trip_id}), (p:PaymentType {payment_type_id: row.payment_type_id})
MERGE (t)-[:PAID_FOR {
fare_amount: toFloat(row.fare_amount),
extra: toFloat(row.extra),
mta_tax: toFloat(row.mta_tax),
tip_amount: toFloat(row.tip_amount),
tolls_amount: toFloat(row.tolls_amount),
improvement_surcharge: toFloat(row.improvement_surcharge),
congestion_surcharge: toFloat(row.congestion_surcharge),
total_amount: toFloat(row.total_amount)
}]->(p);
////////////////// View Relations //////////////////
// 1️⃣ Average and Maximum Trip Distance
MATCH (t:Trip)
RETURN AVG(t.trip_distance) AS average_distance,
MAX(t.trip_distance) AS max_distance;
// 2️⃣ Number of Trips per Vendor
MATCH (v:Vendor)-[:PROVIDED]->(t:Trip)
RETURN v.vendor_id, COUNT(t) AS trip_count
ORDER BY trip_count DESC;
// 3️⃣ Top 5 Most Frequent Trip Routes
MATCH (t:Trip)-[:STARTED_AT]->(start:Location), (t)-[:ENDED_AT]->(end:Location)
RETURN start.location_id AS start_location, end.location_id AS end_location, COUNT(t) AS trip_count
ORDER BY trip_count DESC
LIMIT 5;
// 4️⃣ Number of Trips by Passenger Count
MATCH (t:Trip)-[:HAS_PASSENGERS]->(p:PassengerGroup)
RETURN p.passenger_count, COUNT(t) AS trip_count
ORDER BY p.passenger_count;
// 5️⃣ Total Revenue by Payment Type
MATCH (t:Trip)-[r:PAID_FOR]->(p:PaymentType)
RETURN p.payment_type_description, SUM(r.total_amount) AS total_revenue, COUNT(t) AS trip_count
ORDER BY total_revenue DESC;
// 6️⃣ Top 10 Most Frequent Departure Locations
MATCH (t:Trip)-[:STARTED_AT]->(l:Location)
RETURN l.location_id, COUNT(t) AS departure_count
ORDER BY departure_count DESC
LIMIT 10;
// 7️⃣ Top 10 Most Frequent Arrival Locations
MATCH (t:Trip)-[:ENDED_AT]->(l:Location)
RETURN l.location_id, COUNT(t) AS arrival_count
ORDER BY arrival_count DESC
LIMIT 10;
// 8️⃣ Details of Trips from Location 48 to Location 107
MATCH (t:Trip)-[:STARTED_AT]->(start:Location {location_id: '48'})
MATCH (t)-[:ENDED_AT]->(end:Location {location_id: '107'})
OPTIONAL MATCH (v:Vendor)-[:PROVIDED]->(t)
OPTIONAL MATCH (t)-[:HAS_PASSENGERS]->(p:PassengerGroup)
OPTIONAL MATCH (t)-[:PAID_FOR]->(pay:PaymentType)
RETURN t, start, end, v, p, pay;