This repository has been archived by the owner on Apr 2, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
auroraAPI.thrift
1308 lines (1093 loc) · 40 KB
/
auroraAPI.thrift
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
namespace java org.apache.aurora.gen
namespace py gen.apache.aurora.api
// Thrift interface definition for the aurora scheduler.
/*
* TODO(wfarner): It would be nice if we could put some HTML tags here, regex doesn't handle it though.
* The result of an API operation. A result may only be specified when this is OK.
*/
enum ResponseCode {
INVALID_REQUEST = 0,
OK = 1,
ERROR = 2,
WARNING = 3,
AUTH_FAILED = 4,
/** Raised when an operation was unable to proceed due to an in-progress job update. */
JOB_UPDATING_ERROR = 5,
/** Raised when a scheduler is transiently unavailable and later retry is recommended. */
ERROR_TRANSIENT = 6
}
// Aurora executor framework name.
const string AURORA_EXECUTOR_NAME = 'AuroraExecutor'
// TODO(maxim): Remove in 0.7.0. (AURORA-749)
struct Identity {
2: string user
}
/** A single host attribute. */
struct Attribute {
1: string name
2: set<string> values
}
enum MaintenanceMode {
NONE = 1,
SCHEDULED = 2,
DRAINING = 3,
DRAINED = 4
}
/** The attributes assigned to a host. */
struct HostAttributes {
1: string host
2: set<Attribute> attributes
3: optional MaintenanceMode mode
4: optional string slaveId
}
/**
* A constraint that specifies an explicit set of values, at least one of which must be present
* on a host for a task to be scheduled there.
*/
struct ValueConstraint {
/** If true, treat this as a 'not' - to avoid specific values. */
1: bool negated
2: set<string> values
}
/**
* A constraint the specifies the maximum number of active tasks on a host with a matching
* attribute that may be scheduled simultaneously.
*/
struct LimitConstraint {
1: i32 limit
}
/** Types of constraints that may be applied to a task. */
union TaskConstraint {
1: ValueConstraint value
2: LimitConstraint limit
}
/** A constraint that defines whether a task may be scheduled on a host. */
struct Constraint {
/** Mesos slave attribute that the constraint is matched against. */
1: string name
2: TaskConstraint constraint
}
struct Package {
1: string role
2: string name
3: i32 version
}
/** Arbitrary key-value metadata to be included into TaskConfig. */
struct Metadata {
1: string key
2: string value
}
/** A unique identifier for a Job. */
struct JobKey {
/** User role (Unix service account), for example "mesos" */
1: string role
/** Environment, for example "devel" */
2: string environment
/** Name, for example "labrat" */
3: string name
}
// TODO(jly): Deprecated, remove in 0.21. See AURORA-1959.
/** A unique lock key. */
union LockKey {
1: JobKey job
}
// TODO(jly): Deprecated, remove in 0.21. See AURORA-1959.
/** A generic lock struct to facilitate context specific resource/operation serialization. */
struct Lock {
/** ID of the lock - unique per storage */
1: LockKey key
/** UUID - facilitating soft lock authorization */
2: string token
/** Lock creator */
3: string user
/** Lock creation timestamp in milliseconds */
4: i64 timestampMs
/** Optional message to record with the lock */
5: optional string message
}
/** A unique identifier for the active task within a job. */
struct InstanceKey {
/** Key identifying the job. */
1: JobKey jobKey
/** Unique instance ID for the active task in a job. */
2: i32 instanceId
}
/** URI which mirrors CommandInfo.URI in the Mesos Protobuf */
struct MesosFetcherURI {
/** Where to get the resource from */
1: string value
/** Extract compressed archive after downloading */
2: optional bool extract
/** Cache value using Mesos Fetcher caching mechanism **/
3: optional bool cache
}
struct ExecutorConfig {
/** Name identifying the Executor. */
1: string name
/** Executor configuration data. */
2: string data
}
/** The mode for a volume mount */
enum Mode {
/** Read Write */
RW = 1
/** Read Only */
RO = 2
}
/** A volume mount point within a container */
struct Volume {
/** The path inside the container where the mount will be created. */
1: string containerPath
/** The path on the host that will serve as the source for the mount. */
2: string hostPath
/** The access mode */
3: Mode mode
}
/** Describes an image for use with the Mesos unified containerizer in the Docker format */
struct DockerImage {
/** The name of the image to run */
1: string name
/** The Docker tag identifying the image */
2: string tag
}
/** Describes an image for use with the Mesos unified containerizer in the AppC format */
struct AppcImage {
/** The name of the image to run */
1: string name
/** The appc image id identifying the image */
2: string imageId
}
/** Describes an image to be used with the Mesos unified containerizer */
union Image {
1: DockerImage docker
2: AppcImage appc
}
/** Describes a mesos container, this is the default */
struct MesosContainer {
/** the optional filesystem image to use when launching this task. */
1: optional Image image
/** the optional list of volumes to mount into the task. */
2: optional list<Volume> volumes
}
/** Describes a parameter passed to docker cli */
struct DockerParameter {
/** a parameter to pass to docker. (e.g. volume) */
1: string name
/** the value to pass to a parameter (e.g. /src/webapp:/opt/webapp) */
2: string value
}
/** Describes a docker container */
struct DockerContainer {
/** The container image to be run */
1: string image
/** The arbitrary parameters to pass to container */
2: optional list<DockerParameter> parameters
}
/** Describes a container to be used in a task */
union Container {
1: MesosContainer mesos
2: DockerContainer docker
}
/** Describes resource value required to run a task. */
union Resource {
1: double numCpus
2: i64 ramMb
3: i64 diskMb
4: string namedPort
5: i64 numGpus
}
struct PartitionPolicy {
1: bool reschedule
2: optional i64 delaySecs
}
/** SLA requirements expressed as the percentage of instances to be RUNNING every durationSecs */
struct PercentageSlaPolicy {
/* The percentage of active instances required every `durationSecs`. */
1: double percentage
/** Minimum time duration a task needs to be `RUNNING` to be treated as active */
2: i64 durationSecs
}
/** SLA requirements expressed as the number of instances to be RUNNING every durationSecs */
struct CountSlaPolicy {
/** The number of active instances required every `durationSecs` */
1: i64 count
/** Minimum time duration a task needs to be `RUNNING` to be treated as active */
2: i64 durationSecs
}
/** SLA requirements to be delegated to an external coordinator */
struct CoordinatorSlaPolicy {
/** URL for the coordinator service that needs to be contacted for SLA checks */
1: string coordinatorUrl
/** Field in the Coordinator response json indicating if the action is allowed or not */
2: string statusKey
}
/** SLA requirements expressed in one of the many types */
union SlaPolicy {
1: PercentageSlaPolicy percentageSlaPolicy
2: CountSlaPolicy countSlaPolicy
3: CoordinatorSlaPolicy coordinatorSlaPolicy
}
/** Description of the tasks contained within a job. */
struct TaskConfig {
/** Job task belongs to. */
28: JobKey job
// TODO(maxim): Deprecated. See AURORA-749.
/** contains the role component of JobKey */
17: Identity owner
7: bool isService
11: i32 priority
13: i32 maxTaskFailures
// TODO(mnurolahzade): Deprecated. See AURORA-1708.
/** Whether this is a production task, which can preempt. */
18: optional bool production
/** Task tier type. */
30: optional string tier
/** All resources required to run a task. */
32: set<Resource> resources
20: set<Constraint> constraints
/** Resources to retrieve with Mesos Fetcher */
33: optional set<MesosFetcherURI> mesosFetcherUris
/**
* Custom links to include when displaying this task on the scheduler dashboard. Keys are anchor
* text, values are URLs. Wildcards are supported for dynamic link crafting based on host, ports,
* instance, etc.
*/
22: optional map<string, string> taskLinks
23: optional string contactEmail
/** Executor configuration */
25: optional ExecutorConfig executorConfig
/** Used to display additional details in the UI. */
27: optional set<Metadata> metadata
/** Policy for how to deal with task partitions */
34: optional PartitionPolicy partitionPolicy
/** SLA requirements to be met during maintenance */
35: optional SlaPolicy slaPolicy
// This field is deliberately placed at the end to work around a bug in the immutable wrapper
// code generator. See AURORA-1185 for details.
/** the container the task should use to execute */
29: Container container = { "mesos": {} }
}
struct ResourceAggregate {
/** Aggregated resource values. */
4: set<Resource> resources
}
/** Defines the policy for launching a new cron job when one is already running. */
enum CronCollisionPolicy {
/** Kills the existing job with the colliding name, and runs the new cron job. */
KILL_EXISTING = 0,
/** Cancels execution of the new job, leaving the running job in tact. */
CANCEL_NEW = 1,
/**
* DEPRECATED. For existing jobs, treated the same as CANCEL_NEW.
* createJob will reject jobs with this policy.
*/
RUN_OVERLAP = 2
}
/**
* Description of an Aurora job. One task will be scheduled for each instance within the job.
*/
struct JobConfiguration {
/**
* Key for this job. If not specified name, owner.role, and a reasonable default environment are
* used to construct it server-side.
*/
9: JobKey key
// TODO(maxim): Deprecated. See AURORA-749.
/** Owner of this job. */
7: Identity owner
/**
* If present, the job will be handled as a cron job with this crontab-syntax schedule.
*/
4: optional string cronSchedule
/** Collision policy to use when handling overlapping cron runs. Default is KILL_EXISTING. */
5: CronCollisionPolicy cronCollisionPolicy
/** Task configuration for this job. */
6: TaskConfig taskConfig
/**
* The number of instances in the job. Generated instance IDs for tasks will be in the range
* [0, instances).
*/
8: i32 instanceCount
}
struct JobStats {
/** Number of tasks in active state for this job. */
1: i32 activeTaskCount
/** Number of tasks in finished state for this job. */
2: i32 finishedTaskCount
/** Number of failed tasks for this job. */
3: i32 failedTaskCount
/** Number of tasks in pending state for this job. */
4: i32 pendingTaskCount
}
struct JobSummary {
1: JobConfiguration job
2: JobStats stats
/** Timestamp of next cron run in ms since epoch, for a cron job */
3: optional i64 nextCronRunMs
}
/** Closed range of integers. */
struct Range {
1: i32 first
2: i32 last
}
struct ConfigGroup {
1: TaskConfig config
3: set<Range> instances
}
struct ConfigSummary {
1: JobKey key
2: set<ConfigGroup> groups
}
struct PopulateJobResult {
2: TaskConfig taskConfig
}
struct GetQuotaResult {
/** Total allocated resource quota. */
1: ResourceAggregate quota
/** Resources consumed by production jobs from a shared resource pool. */
2: optional ResourceAggregate prodSharedConsumption
/** Resources consumed by non-production jobs from a shared resource pool. */
3: optional ResourceAggregate nonProdSharedConsumption
/** Resources consumed by production jobs from a dedicated resource pool. */
4: optional ResourceAggregate prodDedicatedConsumption
/** Resources consumed by non-production jobs from a dedicated resource pool. */
5: optional ResourceAggregate nonProdDedicatedConsumption
}
/** States that a task may be in. */
enum ScheduleStatus {
// TODO(maxim): This state does not add much value. Consider dropping it completely.
/* Initial state for a task. A task will remain in this state until it has been persisted. */
INIT = 11,
/** The task will be rescheduled, but is being throttled for restarting too frequently. */
THROTTLED = 16,
/** Task is awaiting assignment to a slave. */
PENDING = 0,
/** Task has been assigned to a slave. */
ASSIGNED = 9,
/** Slave has acknowledged receipt of task and is bootstrapping the task. */
STARTING = 1,
/** The task is running on the slave. */
RUNNING = 2,
/** The task terminated with an exit code of zero. */
FINISHED = 3,
/** The task is being preempted by another task. */
PREEMPTING = 13,
/** The task is being restarted in response to a user request. */
RESTARTING = 12,
/** The task is being restarted in response to a host maintenance request. */
DRAINING = 17,
/** The task terminated with a non-zero exit code. */
FAILED = 4,
/** Execution of the task was terminated by the system. */
KILLED = 5,
/** The task is being forcibly killed. */
KILLING = 6,
/** A fault in the task environment has caused the system to believe the task no longer exists.
* This can happen, for example, when a slave process disappears.
*/
LOST = 7,
/**
* The task is currently partitioned and in an unknown state.
**/
PARTITIONED = 18
}
// States that a task may be in while still considered active.
const set<ScheduleStatus> ACTIVE_STATES = [ScheduleStatus.ASSIGNED,
ScheduleStatus.DRAINING,
ScheduleStatus.KILLING,
ScheduleStatus.PENDING,
ScheduleStatus.PREEMPTING,
ScheduleStatus.RESTARTING
ScheduleStatus.RUNNING,
ScheduleStatus.STARTING,
ScheduleStatus.PARTITIONED,
ScheduleStatus.THROTTLED]
// States that a task may be in while associated with a slave machine and non-terminal.
const set<ScheduleStatus> SLAVE_ASSIGNED_STATES = [ScheduleStatus.ASSIGNED,
ScheduleStatus.DRAINING,
ScheduleStatus.KILLING,
ScheduleStatus.PREEMPTING,
ScheduleStatus.RESTARTING,
ScheduleStatus.RUNNING,
ScheduleStatus.PARTITIONED,
ScheduleStatus.STARTING]
// States that a task may be in while in an active sandbox.
const set<ScheduleStatus> LIVE_STATES = [ScheduleStatus.KILLING,
ScheduleStatus.PREEMPTING,
ScheduleStatus.RESTARTING,
ScheduleStatus.DRAINING,
ScheduleStatus.PARTITIONED,
ScheduleStatus.RUNNING]
// States a completed task may be in.
const set<ScheduleStatus> TERMINAL_STATES = [ScheduleStatus.FAILED,
ScheduleStatus.FINISHED,
ScheduleStatus.KILLED,
ScheduleStatus.LOST]
// Regular expressions for matching valid identifiers for job path components. All expressions
// below should accept and reject the same set of inputs.
const string GOOD_IDENTIFIER_PATTERN = "^[\\w\\-\\.]+$"
// JVM: Use with java.util.regex.Pattern#compile
const string GOOD_IDENTIFIER_PATTERN_JVM = GOOD_IDENTIFIER_PATTERN
// Python: Use with re.compile
const string GOOD_IDENTIFIER_PATTERN_PYTHON = GOOD_IDENTIFIER_PATTERN
/** Event marking a state transition within a task's lifecycle. */
struct TaskEvent {
/** Epoch timestamp in milliseconds. */
1: i64 timestamp
/** New status of the task. */
2: ScheduleStatus status
/** Audit message that explains why a transition occurred. */
3: optional string message
/** Hostname of the scheduler machine that performed the event. */
4: optional string scheduler
}
/** A task assignment that is provided to an executor. */
struct AssignedTask {
/** The mesos task ID for this task. Guaranteed to be globally unique */
1: string taskId
/**
* The mesos slave ID that this task has been assigned to.
* This will not be populated for a PENDING task.
*/
2: string slaveId
/**
* The name of the machine that this task has been assigned to.
* This will not be populated for a PENDING task.
*/
3: string slaveHost
/** Information about how to run this task. */
4: TaskConfig task
/** Ports reserved on the machine while this task is running. */
5: map<string, i32> assignedPorts
/**
* The instance ID assigned to this task. Instance IDs must be unique and contiguous within a
* job, and will be in the range [0, N-1] (inclusive) for a job that has N instances.
*/
6: i32 instanceId
}
/** A task that has been scheduled. */
struct ScheduledTask {
/** The task that was scheduled. */
1: AssignedTask assignedTask
/** The current status of this task. */
2: ScheduleStatus status
/**
* The number of failures that this task has accumulated over the multi-generational history of
* this task.
*/
3: i32 failureCount
/**
* The number of partitions this task has accumulated over its lifetime.
*/
6: i32 timesPartitioned
/** State change history for this task. */
4: list<TaskEvent> taskEvents
/**
* The task ID of the previous generation of this task. When a task is automatically rescheduled,
* a copy of the task is created and ancestor ID of the previous task's task ID.
*/
5: string ancestorId
}
struct ScheduleStatusResult {
1: list<ScheduledTask> tasks
}
struct GetJobsResult {
1: set<JobConfiguration> configs
}
/**
* Contains a set of restrictions on matching tasks where all restrictions must be met
* (terms are AND'ed together).
*/
struct TaskQuery {
14: optional string role
9: optional string environment
2: optional string jobName
4: optional set<string> taskIds
5: optional set<ScheduleStatus> statuses
7: optional set<i32> instanceIds
10: optional set<string> slaveHosts
11: optional set<JobKey> jobKeys
12: optional i32 offset
13: optional i32 limit
}
struct HostStatus {
1: string host
2: MaintenanceMode mode
}
struct RoleSummary {
1: string role
2: i32 jobCount
3: i32 cronJobCount
}
struct Hosts {
1: set<string> hostNames
}
struct PendingReason {
1: string taskId
2: string reason
}
/** States that a job update may be in. */
enum JobUpdateStatus {
/** Update is in progress. */
ROLLING_FORWARD = 0,
/** Update has failed and is being rolled back. */
ROLLING_BACK = 1,
/** Update has been paused while in progress. */
ROLL_FORWARD_PAUSED = 2,
/** Update has been paused during rollback. */
ROLL_BACK_PAUSED = 3,
/** Update has completed successfully. */
ROLLED_FORWARD = 4,
/** Update has failed and rolled back. */
ROLLED_BACK = 5,
/** Update was aborted. */
ABORTED = 6,
/** Unknown error during update. */
ERROR = 7,
/**
* Update failed to complete.
* This can happen if failure thresholds are met while rolling forward, but rollback is disabled,
* or if failure thresholds are met when rolling back.
*/
FAILED = 8,
/** Update has been blocked while in progress due to missing/expired pulse. */
ROLL_FORWARD_AWAITING_PULSE = 9,
/** Update has been blocked during rollback due to missing/expired pulse. */
ROLL_BACK_AWAITING_PULSE = 10
}
/** States the job update can be in while still considered active. */
const set<JobUpdateStatus> ACTIVE_JOB_UPDATE_STATES = [JobUpdateStatus.ROLLING_FORWARD,
JobUpdateStatus.ROLLING_BACK,
JobUpdateStatus.ROLL_FORWARD_PAUSED,
JobUpdateStatus.ROLL_BACK_PAUSED,
JobUpdateStatus.ROLL_FORWARD_AWAITING_PULSE,
JobUpdateStatus.ROLL_BACK_AWAITING_PULSE]
/** States the job update can be in while waiting for a pulse. */
const set<JobUpdateStatus> AWAITNG_PULSE_JOB_UPDATE_STATES = [JobUpdateStatus.ROLL_FORWARD_AWAITING_PULSE,
JobUpdateStatus.ROLL_BACK_AWAITING_PULSE]
/** Job update actions that can be applied to job instances. */
enum JobUpdateAction {
/**
* An instance was moved to the target state successfully, and declared healthy if the desired
* state did not involve deleting the instance.
*/
INSTANCE_UPDATED = 1,
/**
* An instance was rolled back because the job update did not succeed. The instance was reverted
* to the original state prior to the job update, which means that the instance was removed if
* the update added instances to the job.
*/
INSTANCE_ROLLED_BACK = 2,
/**
* An instance is being moved from the original state to the desired state.
*/
INSTANCE_UPDATING = 3,
/**
* An instance is being moved from the desired state back to the original state, because the job
* update failed.
*/
INSTANCE_ROLLING_BACK = 4,
/** An instance update was attempted but failed and was not rolled back. */
INSTANCE_UPDATE_FAILED = 5,
/** An instance rollback was attempted but failed. */
INSTANCE_ROLLBACK_FAILED = 6
}
/** Status of the coordinated update. Intended as a response to pulseJobUpdate RPC. */
enum JobUpdatePulseStatus {
/**
* Update is active. See ACTIVE_JOB_UPDATE_STATES for statuses considered active.
*/
OK = 1,
/**
* Update has reached terminal state. See TERMINAL_JOB_UPDATE_STATES for statuses
* considered terminal.
*/
FINISHED = 2
}
/** Job update key. */
struct JobUpdateKey {
/** Job being updated */
1: JobKey job
/** Update ID. */
2: string id
}
/** Limits the amount of active changes being made to instances to groupSize. */
struct QueueJobUpdateStrategy {
1: i32 groupSize
}
/** Similar to Queue strategy but will not start a new group until all instances in an active
* group have finished updating.
*/
struct BatchJobUpdateStrategy {
1: i32 groupSize
/* Update will pause automatically after each batch completes */
2: bool autopauseAfterBatch
}
/** Same as Batch strategy but each time an active group completes, the size of the next active
* group may change.
*/
struct VariableBatchJobUpdateStrategy {
1: list<i32> groupSizes
/* Update will pause automatically after each batch completes */
2: bool autopauseAfterBatch
}
union JobUpdateStrategy {
1: QueueJobUpdateStrategy queueStrategy
2: BatchJobUpdateStrategy batchStrategy
3: VariableBatchJobUpdateStrategy varBatchStrategy
}
/** Job update thresholds and limits. */
struct JobUpdateSettings {
/** Deprecated, please set value inside of desired update strategy instead.
* Max number of instances being updated at any given moment.
*/
1: i32 updateGroupSize
/** Max number of instance failures to tolerate before marking instance as FAILED. */
2: i32 maxPerInstanceFailures
/** Max number of FAILED instances to tolerate before terminating the update. */
3: i32 maxFailedInstances
/** Min time to watch a RUNNING instance. */
5: i32 minWaitInInstanceRunningMs
/** If true, enables failed update rollback. */
6: bool rollbackOnFailure
/** Instance IDs to act on. All instances will be affected if this is not set. */
7: set<Range> updateOnlyTheseInstances
/** Deprecated, please set updateStrategy to the Batch strategy instead.
* If true, use updateGroupSize as strict batching boundaries, and avoid proceeding to another
* batch until the preceding batch finishes updating.
*/
8: bool waitForBatchCompletion
/**
* If set, requires external calls to pulseJobUpdate RPC within the specified rate for the
* update to make progress. If no pulses received within specified interval the update will
* block. A blocked update is unable to continue but retains its current status. It may only get
* unblocked by a fresh pulseJobUpdate call.
*/
9: optional i32 blockIfNoPulsesAfterMs
/**
* If true, updates will obey the SLA requirements of the tasks being updated. If the SLA policy
* differs between the old and new task configurations, updates will use the newest configuration.
*/
10: optional bool slaAware
/** Update strategy to be used for the update. See JobUpdateStrategy for choices. */
11: optional JobUpdateStrategy updateStrategy
}
/** Event marking a state transition in job update lifecycle. */
struct JobUpdateEvent {
/** Update status. */
1: JobUpdateStatus status
/** Epoch timestamp in milliseconds. */
2: i64 timestampMs
/** User who performed this event (if user-initiated). */
3: optional string user
/**
* Message from the user (for user-initiated transitions) or the scheduler about why the state was
* changed.
*/
4: optional string message
}
/** Event marking a state transition in job instance update lifecycle. */
struct JobInstanceUpdateEvent {
/** Job instance ID. */
1: i32 instanceId
/** Epoch timestamp in milliseconds. */
2: i64 timestampMs
/** Job update action taken on the instance. */
3: JobUpdateAction action
/** Optional message explaining the instance update event. */
4: optional string message
}
/** Maps instance IDs to TaskConfigs it. */
struct InstanceTaskConfig {
/** A TaskConfig associated with instances. */
1: TaskConfig task
/** Instances associated with the TaskConfig. */
2: set<Range> instances
}
/** Current job update state including status and created/modified timestamps. */
struct JobUpdateState {
/** Current status of the update. */
1: JobUpdateStatus status
/** Created timestamp in milliseconds. */
2: i64 createdTimestampMs
/** Last modified timestamp in milliseconds. */
3: i64 lastModifiedTimestampMs
}
/** Summary of the job update including job key, user and current state. */
struct JobUpdateSummary {
/** Unique identifier for the update. */
5: JobUpdateKey key
/** User initiated an update. */
3: string user
/** Current job update state. */
4: JobUpdateState state
/** Update metadata supplied by the client. */
6: optional set<Metadata> metadata
}
/** Update configuration and setting details. */
struct JobUpdateInstructions {
/** Actual InstanceId -> TaskConfig mapping when the update was requested. */
1: set<InstanceTaskConfig> initialState
/** Desired configuration when the update completes. */
2: InstanceTaskConfig desiredState
/** Update specific settings. */
3: JobUpdateSettings settings
}
/** Full definition of the job update. */
struct JobUpdate {
/** Update summary. */
1: JobUpdateSummary summary
/** Update configuration. */
2: JobUpdateInstructions instructions
}
struct JobUpdateDetails {
/** Update definition. */
1: JobUpdate update
/** History for this update. */
2: list<JobUpdateEvent> updateEvents
/** History for the individual instances updated. */
3: list<JobInstanceUpdateEvent> instanceEvents
}
/** A request to update the following instances of an existing job. Used by startUpdate. */
struct JobUpdateRequest {
/** Desired TaskConfig to apply. */
1: TaskConfig taskConfig
/** Desired number of instances of the task config. */
2: i32 instanceCount
/** Update settings and limits. */
3: JobUpdateSettings settings
/** Update metadata supplied by the client issuing the JobUpdateRequest. */
4: optional set<Metadata> metadata
}
/**
* Contains a set of restrictions on matching job updates where all restrictions must be met
* (terms are AND'ed together).
*/
struct JobUpdateQuery {
/** Job role. */
2: optional string role
/** Unique identifier for a job update. */
8: optional JobUpdateKey key
/** Job key. */
3: optional JobKey jobKey
/** User who created the update. */
4: optional string user
/** Set of update statuses. */
5: optional set<JobUpdateStatus> updateStatuses
/** Offset to serve data from. Used by pagination. */
6: i32 offset
/** Number or records to serve. Used by pagination. */
7: i32 limit
}
struct HostMaintenanceRequest {
1: string host
2: SlaPolicy defaultSlaPolicy
3: i64 timeoutSecs
4: i64 createdTimestampMs
}
struct ListBackupsResult {
1: set<string> backups
}
struct StartMaintenanceResult {
1: set<HostStatus> statuses
}
struct DrainHostsResult {
1: set<HostStatus> statuses
}
struct QueryRecoveryResult {
1: set<ScheduledTask> tasks
}
struct MaintenanceStatusResult {
1: set<HostStatus> statuses
}
struct EndMaintenanceResult {
1: set<HostStatus> statuses
}
struct RoleSummaryResult {
1: set<RoleSummary> summaries
}
struct JobSummaryResult {
1: set<JobSummary> summaries
}
struct ConfigSummaryResult {
1: ConfigSummary summary
}
struct GetPendingReasonResult {
1: set<PendingReason> reasons
}
/** Result of the startUpdate call. */
struct StartJobUpdateResult {
/** Unique identifier for the job update. */
1: JobUpdateKey key
/** Summary of the update that is in progress for the given JobKey. */
2: optional JobUpdateSummary updateSummary
}
/** Result of the getJobUpdateSummaries call. */
struct GetJobUpdateSummariesResult {
1: list<JobUpdateSummary> updateSummaries
}
/** Result of the getJobUpdateDetails call. */
struct GetJobUpdateDetailsResult {
// TODO(zmanji): Remove this once we complete AURORA-1765