@@ -19,13 +19,18 @@ import play.api.libs.json.{JsValue, Json}
19
19
import play .api .libs .ws .JsonBodyReadables ._
20
20
import za .co .absa .hyperdrive .trigger .api .rest .utils .WSClientProvider
21
21
import za .co .absa .hyperdrive .trigger .configuration .application .SparkConfig
22
+ import za .co .absa .hyperdrive .trigger .models .enums .JobStatuses
22
23
import za .co .absa .hyperdrive .trigger .models .enums .JobStatuses ._
23
24
import za .co .absa .hyperdrive .trigger .models .{JobInstance , SparkInstanceParameters }
24
25
import za .co .absa .hyperdrive .trigger .scheduler .executors .spark .{FinalStatuses => YarnFinalStatuses }
25
26
27
+ import java .time .LocalDateTime
28
+ import java .time .temporal .ChronoUnit
26
29
import scala .concurrent .{ExecutionContext , Future }
27
30
28
31
object SparkExecutor {
32
+ private val ExtraSubmitTimeout = 60000
33
+
29
34
def execute (
30
35
jobInstance : JobInstance ,
31
36
jobParameters : SparkInstanceParameters ,
@@ -50,6 +55,13 @@ object SparkExecutor {
50
55
}) match {
51
56
case Seq (first) =>
52
57
updateJob(jobInstance.copy(applicationId = Some (first.id), jobStatus = getStatus(first.finalStatus)))
58
+ case _
59
+ // It relies on the same value set for sparkYarnSink.submitTimeout in multi instance deployment
60
+ if jobInstance.jobStatus == JobStatuses .Submitting && jobInstance.updated
61
+ .map(lastUpdated => ChronoUnit .MILLIS .between(lastUpdated, LocalDateTime .now()))
62
+ .exists(_ < sparkConfig.yarn.submitTimeout + ExtraSubmitTimeout ) =>
63
+ // Do nothing for submit timeout period to avoid two parallel job submissions/executions
64
+ Future ((): Unit )
53
65
case _ => sparkClusterService.handleMissingYarnStatus(jobInstance, updateJob)
54
66
}
55
67
}
0 commit comments