@@ -42,8 +42,54 @@ def runHotcellAnalysis(spark: SparkSession, pointPath: String): DataFrame =
42
42
val maxZ = 31
43
43
val numCells = (maxX - minX + 1 )* (maxY - minY + 1 )* (maxZ - minZ + 1 )
44
44
45
- // YOU NEED TO CHANGE THIS PART
45
+ pickupInfo = spark.sql(" select x,y,z from pickupInfoView where x>= " + minX + " and x<= " + maxX + " and y>= " + minY + " and y<= " + maxY + " and z>= " + minZ + " and z<= " + maxZ + " order by z,y,x" )
46
+ pickupInfo.createOrReplaceTempView(" selectedCellVals" )
47
+ // pickupInfo.show()
46
48
47
- return pickupInfo // YOU NEED TO CHANGE THIS PART
49
+ pickupInfo = spark.sql(" select x, y, z, count(*) as hotCells from selectedCellVals group by x, y, z order by z,y,x" )
50
+ pickupInfo.createOrReplaceTempView(" selectedCellHotness" )
51
+ // pickupInfo.show()
52
+
53
+ val sumOfSelectedCcells = spark.sql(" select sum(hotCells) as sumHotCells from selectedCellHotness" )
54
+ sumOfSelectedCcells.createOrReplaceTempView(" sumOfSelectedCcells" )
55
+ // sumOfSelectedCcells.show()
56
+
57
+ val mean = (sumOfSelectedCcells.first().getLong(0 ).toDouble / numCells.toDouble).toDouble
58
+ // println(mean)
59
+
60
+ spark.udf.register(" squared" , (inputX : Int ) => (((inputX* inputX).toDouble)))
61
+
62
+ val sumOfSquares = spark.sql(" select sum(squared(hotCells)) as sumOfSquares from selectedCellHotness" )
63
+ sumOfSquares.createOrReplaceTempView(" sumOfSquares" )
64
+ // sumOfSquares.show()
65
+
66
+ val standardDeviation = scala.math.sqrt(((sumOfSquares.first().getDouble(0 ).toDouble / numCells.toDouble) - (mean.toDouble * mean.toDouble))).toDouble
67
+ // println(mean)
68
+
69
+ spark.udf.register(" adjacentCells" , (inputX : Int , inputY : Int , inputZ : Int , minX : Int , maxX : Int , minY : Int , maxY : Int , minZ : Int , maxZ : Int ) => ((HotcellUtils .calculateAdjacentCells(inputX, inputY, inputZ, minX, minY, minZ, maxX, maxY, maxZ))))
70
+
71
+ val adjacentCells = spark.sql(" select adjacentCells(sch1.x, sch1.y, sch1.z, " + minX + " ," + maxX + " ," + minY + " ," + maxY + " ," + minZ + " ," + maxZ + " ) as adjacentCellCount,"
72
+ + " sch1.x as x, sch1.y as y, sch1.z as z, "
73
+ + " sum(sch2.hotCells) as sumHotCells "
74
+ + " from selectedCellHotness as sch1, selectedCellHotness as sch2 "
75
+ + " where (sch2.x = sch1.x+1 or sch2.x = sch1.x or sch2.x = sch1.x-1) "
76
+ + " and (sch2.y = sch1.y+1 or sch2.y = sch1.y or sch2.y = sch1.y-1) "
77
+ + " and (sch2.z = sch1.z+1 or sch2.z = sch1.z or sch2.z = sch1.z-1) "
78
+ + " group by sch1.z, sch1.y, sch1.x "
79
+ + " order by sch1.z, sch1.y, sch1.x" )
80
+ adjacentCells.createOrReplaceTempView(" adjacentCells" )
81
+ // adjacentCells.show()
82
+
83
+ spark.udf.register(" zScore" , (adjacentCellCount : Int , sumHotCells : Int , numCells : Int , x : Int , y : Int , z : Int , mean : Double , standardDeviation : Double ) => ((HotcellUtils .calculateZScore(adjacentCellCount, sumHotCells, numCells, x, y, z, mean, standardDeviation))))
84
+
85
+ pickupInfo = spark.sql(" select zScore(adjacentCellCount, sumHotCells, " + numCells + " , x, y, z," + mean + " , " + standardDeviation + " ) as getisOrdStatistic, x, y, z from adjacentCells order by getisOrdStatistic desc" );
86
+ pickupInfo.createOrReplaceTempView(" zScore" )
87
+ // pickupInfo.show()
88
+
89
+ pickupInfo = spark.sql(" select x, y, z from zScore" )
90
+ pickupInfo.createOrReplaceTempView(" finalPickupInfo" )
91
+ // pickupInfo.show()
92
+
93
+ return pickupInfo
48
94
}
49
95
}
0 commit comments