@@ -49,16 +49,18 @@ import (
49
49
)
50
50
51
51
const (
52
- namespace = "default"
53
- batchJobName = "test-job"
54
- objectiveMetric = "accuracy"
55
- timeout = time .Second * 10
52
+ namespace = "default"
53
+ succeededBatchJobName = "test-job-succeeded"
54
+ failedBatchJobName = "test-job-failed"
55
+ objectiveMetric = "accuracy"
56
+ timeout = time .Second * 10
56
57
)
57
58
58
59
var (
59
60
startTime = time .Now ()
60
61
completionTime = time .Now ().Add (time .Second )
61
- batchJobKey = types.NamespacedName {Name : batchJobName , Namespace : namespace }
62
+ succeededBatchJobKey = types.NamespacedName {Name : succeededBatchJobName , Namespace : namespace }
63
+ failedBatchJobKey = types.NamespacedName {Name : failedBatchJobName , Namespace : namespace }
62
64
observationLogAvailable = & api_pb.GetObservationLogReply {
63
65
ObservationLog : & api_pb.ObservationLog {
64
66
MetricLogs : []* api_pb.MetricLog {
@@ -182,6 +184,67 @@ func TestReconcileBatchJob(t *testing.T) {
182
184
g .Expect (mgr .Start (mgrCtx )).NotTo (gomega .HaveOccurred ())
183
185
}()
184
186
187
+ t .Run (`Trial run with "Failed" BatchJob.` , func (t * testing.T ) {
188
+ g := gomega .NewGomegaWithT (t )
189
+ mockManagerClient .EXPECT ().DeleteTrialObservationLog (gomock .Any ()).Return (nil , nil )
190
+
191
+ trial := newFakeTrialBatchJob (commonv1beta1 .StdOutCollector , "test-failed-batch-job" , failedBatchJobName )
192
+ trialKey := types.NamespacedName {Name : "test-failed-batch-job" , Namespace : namespace }
193
+ batchJob := & batchv1.Job {}
194
+
195
+ // Create the Trial with StdOut MC
196
+ g .Expect (c .Create (ctx , trial )).NotTo (gomega .HaveOccurred ())
197
+
198
+ // Expect that BatchJob with appropriate name is created
199
+ g .Eventually (func (g gomega.Gomega ) {
200
+ g .Expect (c .Get (ctx , failedBatchJobKey , batchJob )).Should (gomega .Succeed ())
201
+ }, timeout ).Should (gomega .Succeed ())
202
+
203
+ // Expect that Trial status is running
204
+ g .Eventually (func (g gomega.Gomega ) {
205
+ g .Expect (c .Get (ctx , trialKey , trial )).Should (gomega .Succeed ())
206
+ g .Expect (trial .IsRunning ()).Should (gomega .BeTrue ())
207
+ }, timeout ).Should (gomega .Succeed ())
208
+
209
+ // Manually update BatchJob status to failed
210
+ // Expect that Trial status is failed
211
+ batchJobFailedMessage := "BatchJob completed test message"
212
+ batchJobFailedReason := "BatchJob completed test reason"
213
+ g .Eventually (func (g gomega.Gomega ) {
214
+ g .Expect (c .Get (ctx , failedBatchJobKey , batchJob )).Should (gomega .Succeed ())
215
+ batchJob .Status = batchv1.JobStatus {
216
+ Conditions : []batchv1.JobCondition {
217
+ {
218
+ Type : batchv1 .JobFailureTarget ,
219
+ Status : corev1 .ConditionTrue ,
220
+ Message : batchJobFailedMessage ,
221
+ Reason : batchJobFailedReason ,
222
+ },
223
+ {
224
+ Type : batchv1 .JobFailed ,
225
+ Status : corev1 .ConditionTrue ,
226
+ Message : batchJobFailedMessage ,
227
+ Reason : batchJobFailedReason ,
228
+ },
229
+ },
230
+ StartTime : & metav1.Time {Time : startTime },
231
+ }
232
+ g .Expect (c .Status ().Update (ctx , batchJob )).Should (gomega .Succeed ())
233
+ g .Expect (c .Get (ctx , trialKey , trial )).Should (gomega .Succeed ())
234
+ g .Expect (trial .IsFailed ()).Should (gomega .BeTrue ())
235
+ }, timeout ).Should (gomega .Succeed ())
236
+
237
+ // Delete the Trial
238
+ g .Expect (c .Delete (ctx , trial )).NotTo (gomega .HaveOccurred ())
239
+
240
+ // Expect that Trial is deleted
241
+ // BatchJob can't be deleted because GC doesn't work in envtest and BatchJob stuck in termination phase.
242
+ // Ref: https://book.kubebuilder.io/reference/testing/envtest.html#testing-considerations.
243
+ g .Eventually (func (g gomega.Gomega ) {
244
+ g .Expect (errors .IsNotFound (c .Get (ctx , trialKey , & trialsv1beta1.Trial {}))).Should (gomega .BeTrue ())
245
+ }, timeout ).Should (gomega .Succeed ())
246
+ })
247
+
185
248
t .Run (`Trial with "Complete" BatchJob and Available metrics.` , func (t * testing.T ) {
186
249
g := gomega .NewGomegaWithT (t )
187
250
gomock .InOrder (
@@ -190,20 +253,20 @@ func TestReconcileBatchJob(t *testing.T) {
190
253
)
191
254
192
255
// Create the Trial with StdOut MC
193
- trial := newFakeTrialBatchJob (commonv1beta1 .StdOutCollector , "test-available-stdout" )
256
+ trial := newFakeTrialBatchJob (commonv1beta1 .StdOutCollector , "test-available-stdout" , succeededBatchJobName )
194
257
trialKey := types.NamespacedName {Name : "test-available-stdout" , Namespace : namespace }
195
258
batchJob := & batchv1.Job {}
196
259
g .Expect (c .Create (ctx , trial )).NotTo (gomega .HaveOccurred ())
197
260
198
261
// Expect that BatchJob with appropriate name is created
199
262
g .Eventually (func (g gomega.Gomega ) {
200
- g .Expect (c .Get (ctx , batchJobKey , batchJob )).Should (gomega .Succeed ())
263
+ g .Expect (c .Get (ctx , succeededBatchJobKey , batchJob )).Should (gomega .Succeed ())
201
264
}, timeout ).Should (gomega .Succeed ())
202
265
203
266
// Update BatchJob status to Complete.
204
267
batchJobCompleteMessage := "BatchJob completed test message"
205
268
batchJobCompleteReason := "BatchJob completed test reason"
206
- g .Expect (c .Get (ctx , batchJobKey , batchJob )).NotTo (gomega .HaveOccurred ())
269
+ g .Expect (c .Get (ctx , succeededBatchJobKey , batchJob )).NotTo (gomega .HaveOccurred ())
207
270
batchJob .Status = batchv1.JobStatus {
208
271
Conditions : []batchv1.JobCondition {
209
272
{
@@ -254,7 +317,7 @@ func TestReconcileBatchJob(t *testing.T) {
254
317
mockManagerClient .EXPECT ().DeleteTrialObservationLog (gomock .Any ()).Return (nil , nil ),
255
318
)
256
319
// Create the Trial with StdOut MC
257
- trial := newFakeTrialBatchJob (commonv1beta1 .StdOutCollector , "test-unavailable-stdout" )
320
+ trial := newFakeTrialBatchJob (commonv1beta1 .StdOutCollector , "test-unavailable-stdout" , succeededBatchJobName )
258
321
trialKey := types.NamespacedName {Name : "test-unavailable-stdout" , Namespace : namespace }
259
322
g .Expect (c .Create (ctx , trial )).NotTo (gomega .HaveOccurred ())
260
323
@@ -295,7 +358,7 @@ func TestReconcileBatchJob(t *testing.T) {
295
358
mockManagerClient .EXPECT ().ReportTrialObservationLog (gomock .Any (), gomock .Any ()).Return (nil , nil ).AnyTimes ()
296
359
297
360
// Create the Trial with Push MC
298
- trial := newFakeTrialBatchJob (commonv1beta1 .PushCollector , "test-unavailable-push-failed-once" )
361
+ trial := newFakeTrialBatchJob (commonv1beta1 .PushCollector , "test-unavailable-push-failed-once" , succeededBatchJobName )
299
362
trialKey := types.NamespacedName {Name : "test-unavailable-push-failed-once" , Namespace : namespace }
300
363
g .Expect (c .Create (ctx , trial )).NotTo (gomega .HaveOccurred ())
301
364
@@ -322,73 +385,6 @@ func TestReconcileBatchJob(t *testing.T) {
322
385
}, timeout ).Should (gomega .Succeed ())
323
386
})
324
387
325
- t .Run (`Trial run with "Failed" BatchJob.` , func (t * testing.T ) {
326
- g := gomega .NewGomegaWithT (t )
327
- mockManagerClient .EXPECT ().DeleteTrialObservationLog (gomock .Any ()).Return (nil , nil )
328
-
329
- trial := newFakeTrialBatchJob (commonv1beta1 .StdOutCollector , "test-failed-batch-job" )
330
- trialKey := types.NamespacedName {Name : "test-failed-batch-job" , Namespace : namespace }
331
- batchJob := & batchv1.Job {}
332
-
333
- // Create the Trial with StdOut MC
334
- g .Expect (c .Create (ctx , trial )).NotTo (gomega .HaveOccurred ())
335
-
336
- // Expect that BatchJob with appropriate name is created
337
- g .Eventually (func (g gomega.Gomega ) {
338
- g .Expect (c .Get (ctx , batchJobKey , batchJob )).Should (gomega .Succeed ())
339
- }, timeout ).Should (gomega .Succeed ())
340
-
341
- // Expect that Trial status is running
342
- g .Eventually (func (g gomega.Gomega ) {
343
- g .Expect (c .Get (ctx , trialKey , trial )).Should (gomega .Succeed ())
344
- g .Expect (trial .IsRunning ()).Should (gomega .BeTrue ())
345
- }, timeout ).Should (gomega .Succeed ())
346
-
347
- // Manually update BatchJob status to failed
348
- // Expect that Trial status is failed
349
- batchJobFailedMessage := "BatchJob completed test message"
350
- batchJobFailedReason := "BatchJob completed test reason"
351
- g .Eventually (func (g gomega.Gomega ) {
352
- g .Expect (c .Get (ctx , batchJobKey , batchJob )).Should (gomega .Succeed ())
353
- batchJob .Status = batchv1.JobStatus {
354
- Conditions : []batchv1.JobCondition {
355
- {
356
- Type : batchv1 .JobFailureTarget ,
357
- Status : corev1 .ConditionTrue ,
358
- Message : batchJobFailedMessage ,
359
- Reason : batchJobFailedReason ,
360
- },
361
- {
362
- Type : batchv1 .JobFailed ,
363
- Status : corev1 .ConditionTrue ,
364
- Message : batchJobFailedMessage ,
365
- Reason : batchJobFailedReason ,
366
- },
367
- {
368
- Type : batchv1 .JobSuccessCriteriaMet ,
369
- Status : corev1 .ConditionFalse ,
370
- Message : batchJobFailedMessage ,
371
- Reason : batchJobFailedReason ,
372
- },
373
- },
374
- StartTime : & metav1.Time {Time : startTime },
375
- }
376
- g .Expect (c .Status ().Update (ctx , batchJob )).Should (gomega .Succeed ())
377
- g .Expect (c .Get (ctx , trialKey , trial )).Should (gomega .Succeed ())
378
- g .Expect (trial .IsFailed ()).Should (gomega .BeTrue ())
379
- }, timeout ).Should (gomega .Succeed ())
380
-
381
- // Delete the Trial
382
- g .Expect (c .Delete (ctx , trial )).NotTo (gomega .HaveOccurred ())
383
-
384
- // Expect that Trial is deleted
385
- // BatchJob can't be deleted because GC doesn't work in envtest and BatchJob stuck in termination phase.
386
- // Ref: https://book.kubebuilder.io/reference/testing/envtest.html#testing-considerations.
387
- g .Eventually (func (g gomega.Gomega ) {
388
- g .Expect (errors .IsNotFound (c .Get (ctx , trialKey , & trialsv1beta1.Trial {}))).Should (gomega .BeTrue ())
389
- }, timeout ).Should (gomega .Succeed ())
390
- })
391
-
392
388
t .Run ("Update status for empty Trial" , func (t * testing.T ) {
393
389
g := gomega .NewGomegaWithT (t )
394
390
g .Expect (r .updateStatus (& trialsv1beta1.Trial {})).To (gomega .HaveOccurred ())
@@ -454,7 +450,7 @@ func TestGetObjectiveMetricValue(t *testing.T) {
454
450
g .Expect (err ).To (gomega .HaveOccurred ())
455
451
}
456
452
457
- func newFakeTrialBatchJob (mcType commonv1beta1.CollectorKind , trialName string ) * trialsv1beta1.Trial {
453
+ func newFakeTrialBatchJob (mcType commonv1beta1.CollectorKind , trialName , jobName string ) * trialsv1beta1.Trial {
458
454
primaryContainer := "training-container"
459
455
460
456
job := & batchv1.Job {
@@ -463,7 +459,7 @@ func newFakeTrialBatchJob(mcType commonv1beta1.CollectorKind, trialName string)
463
459
Kind : "Job" ,
464
460
},
465
461
ObjectMeta : metav1.ObjectMeta {
466
- Name : batchJobName ,
462
+ Name : jobName ,
467
463
Namespace : namespace ,
468
464
},
469
465
Spec : batchv1.JobSpec {
0 commit comments