@@ -35,50 +35,28 @@ import org.apache.comet.vector.NativeUtil
35
35
* `hasNext` can be used to check if it is the end of this iterator (i.e. the native query is
36
36
* done).
37
37
*
38
+ * @param id
39
+ * The unique id of the query plan behind this native execution.
38
40
* @param inputs
39
41
* The input iterators producing sequence of batches of Arrow Arrays.
40
- * @param protobufQueryPlan
41
- * The serialized bytes of Spark execution plan.
42
42
* @param numParts
43
43
* The number of partitions.
44
44
* @param partitionIndex
45
45
* The index of the partition.
46
46
*/
47
47
class CometExecIterator (
48
48
val id : Long ,
49
+ nativePlan : Long ,
49
50
inputs : Seq [Iterator [ColumnarBatch ]],
50
51
numOutputCols : Int ,
51
- protobufQueryPlan : Array [Byte ],
52
- nativeMetrics : CometMetricNode ,
53
52
numParts : Int ,
54
53
partitionIndex : Int )
55
54
extends Iterator [ColumnarBatch ] {
55
+ import CometExecIterator ._
56
56
57
- private val nativeLib = new Native ()
58
- private val nativeUtil = new NativeUtil ()
59
57
private val cometBatchIterators = inputs.map { iterator =>
60
58
new CometBatchIterator (iterator, nativeUtil)
61
59
}.toArray
62
- private val plan = {
63
- val conf = SparkEnv .get.conf
64
- // Only enable unified memory manager when off-heap mode is enabled. Otherwise,
65
- // we'll use the built-in memory pool from DF, and initializes with `memory_limit`
66
- // and `memory_fraction` below.
67
- nativeLib.createPlan(
68
- id,
69
- cometBatchIterators,
70
- protobufQueryPlan,
71
- nativeMetrics,
72
- new CometTaskMemoryManager (id),
73
- batchSize = COMET_BATCH_SIZE .get(),
74
- use_unified_memory_manager = conf.getBoolean(" spark.memory.offHeap.enabled" , false ),
75
- memory_limit = CometSparkSessionExtensions .getCometMemoryOverhead(conf),
76
- memory_fraction = COMET_EXEC_MEMORY_FRACTION .get(),
77
- debug = COMET_DEBUG_ENABLED .get(),
78
- explain = COMET_EXPLAIN_NATIVE_ENABLED .get(),
79
- workerThreads = COMET_WORKER_THREADS .get(),
80
- blockingThreads = COMET_BLOCKING_THREADS .get())
81
- }
82
60
83
61
private var nextBatch : Option [ColumnarBatch ] = None
84
62
private var currentBatch : ColumnarBatch = null
@@ -91,7 +69,13 @@ class CometExecIterator(
91
69
numOutputCols,
92
70
(arrayAddrs, schemaAddrs) => {
93
71
val ctx = TaskContext .get()
94
- nativeLib.executePlan(ctx.stageId(), partitionIndex, plan, arrayAddrs, schemaAddrs)
72
+ nativeLib.executePlan(
73
+ ctx.stageId(),
74
+ partitionIndex,
75
+ nativePlan,
76
+ cometBatchIterators,
77
+ arrayAddrs,
78
+ schemaAddrs)
95
79
})
96
80
}
97
81
@@ -134,8 +118,6 @@ class CometExecIterator(
134
118
currentBatch.close()
135
119
currentBatch = null
136
120
}
137
- nativeUtil.close()
138
- nativeLib.releasePlan(plan)
139
121
140
122
// The allocator thoughts the exported ArrowArray and ArrowSchema structs are not released,
141
123
// so it will report:
@@ -160,3 +142,43 @@ class CometExecIterator(
160
142
}
161
143
}
162
144
}
145
+
146
+ object CometExecIterator {
147
+ val nativeLib = new Native ()
148
+ val nativeUtil = new NativeUtil ()
149
+
150
+ val planMap = new java.util.concurrent.ConcurrentHashMap [Array [Byte ], Long ]()
151
+
152
+ def createPlan (id : Long , protobufQueryPlan : Array [Byte ], nativeMetrics : CometMetricNode ): Long =
153
+ synchronized {
154
+ if (planMap.containsKey(protobufQueryPlan)) {
155
+ planMap.get(protobufQueryPlan)
156
+ } else {
157
+ val conf = SparkEnv .get.conf
158
+
159
+ val plan = nativeLib.createPlan(
160
+ id,
161
+ protobufQueryPlan,
162
+ nativeMetrics,
163
+ new CometTaskMemoryManager (id),
164
+ batchSize = COMET_BATCH_SIZE .get(),
165
+ use_unified_memory_manager = conf.getBoolean(" spark.memory.offHeap.enabled" , false ),
166
+ memory_limit = CometSparkSessionExtensions .getCometMemoryOverhead(conf),
167
+ memory_fraction = COMET_EXEC_MEMORY_FRACTION .get(),
168
+ debug = COMET_DEBUG_ENABLED .get(),
169
+ explain = COMET_EXPLAIN_NATIVE_ENABLED .get(),
170
+ workerThreads = COMET_WORKER_THREADS .get(),
171
+ blockingThreads = COMET_BLOCKING_THREADS .get())
172
+ planMap.put(protobufQueryPlan, plan)
173
+ plan
174
+ }
175
+ }
176
+
177
+ def releasePlan (protobufQueryPlan : Array [Byte ]): Unit = synchronized {
178
+ if (planMap.containsKey(protobufQueryPlan)) {
179
+ val plan = planMap.get(protobufQueryPlan)
180
+ nativeLib.releasePlan(plan)
181
+ planMap.remove(protobufQueryPlan)
182
+ }
183
+ }
184
+ }
0 commit comments