Skip to content

[FLINK-37515] Basic support for Blue/Green deployments #969

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
be8e899
[release] Update docs config for release branch
gyfora Feb 21, 2025
415ace4
[FLINK-37372] Fix infinite loop bug in savepoint error handling
gyfora Feb 24, 2025
01d2440
[FLINK-37370] [Observer] Finished batch jobs throw ReconciliationExce…
luca-p-castelli Feb 25, 2025
ba62f07
FLIP-503 first batch
schongloo Apr 8, 2025
eb5b24f
Addressing PR comments. Adding unit test to cover for Spec changes mi…
schongloo Apr 11, 2025
75d787e
Addressing PR comments/cleaning up comments.
schongloo Apr 11, 2025
8addc70
Adding a TransitionMode to the CRD/API for the future (currently only…
schongloo Apr 14, 2025
6ecc28c
Addressing the test sync issues that caused intermittent NullPointerE…
schongloo Apr 15, 2025
1df7747
Simplifying the retry/abort logic.
schongloo Apr 21, 2025
3690b0d
Addressing edge case restarting after the first deployment fails.
schongloo Apr 30, 2025
24f65e8
Undoing accidental test removal
schongloo May 5, 2025
18da43c
Defensive code vs. non-BASIC TransitionMode (only for FLIP-503)
schongloo May 5, 2025
7d5072a
Updated rbac.yaml
schongloo May 5, 2025
27ae567
Addressing PR comments
schongloo May 13, 2025
7bcb244
Fixing accidental comment
schongloo May 13, 2025
4834517
More PR comments addressed
schongloo May 13, 2025
4f4c39f
Clean up
schongloo May 13, 2025
f9245aa
Updated Blue/Green CRD
schongloo May 13, 2025
dcce6c8
Using the initial savepoint path, on first deployments only, if speci…
schongloo May 14, 2025
d4b31ea
Clean up
schongloo May 14, 2025
f04723f
Clean up
schongloo May 14, 2025
3698bb3
Clean up
schongloo May 14, 2025
d62260b
Addressing PR comments
schongloo May 16, 2025
f203cd0
Merging main (release 1.13)
schongloo Jul 1, 2025
ba6197a
Remove scale subresource from blue green deployments
schongloo Jul 1, 2025
3bbd618
Remove scale subresource from blue green deployments
schongloo Jul 1, 2025
0eac03b
Merge branch 'main' into release-1.11-bluegreen-flip503
schongloo Jul 1, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

baseURL = '//nightlies.apache.org/flink/flink-kubernetes-operator-docs-main'
baseURL = '//nightlies.apache.org/flink/flink-kubernetes-operator-docs-release-1.11'
languageCode = 'en-us'
title = 'Apache Flink Kubernetes Operator'
enableGitInfo = false
Expand All @@ -24,7 +24,7 @@ pygmentsUseClasses = true
[params]
# Flag whether this is a stable version or not.
# Used for the quickstart page.
IsStable = false
IsStable = true

# Flag to indicate whether an outdated warning should be shown.
ShowOutDatedWarning = false
Expand Down Expand Up @@ -53,13 +53,13 @@ pygmentsUseClasses = true

ProjectHomepage = "//flink.apache.org"

JavaDocs = "//nightlies.apache.org/flink/flink-kubernetes-operator-docs-main/api/java/"
JavaDocs = "//nightlies.apache.org/flink/flink-kubernetes-operator-docs-release-1.11/api/java/"

# External links at the bottom
# of the menu
MenuLinks = [
["Project Homepage", "//flink.apache.org"],
["JavaDocs", "//nightlies.apache.org/flink/flink-kubernetes-operator-docs-main/api/java/"]
["JavaDocs", "//nightlies.apache.org/flink/flink-kubernetes-operator-docs-release-1.11/api/java/"]
]

PreviousDocs = [
Expand Down
21 changes: 21 additions & 0 deletions flink-kubernetes-operator-api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ under the License.
<include>flinkdeployments.flink.apache.org-v1.yml</include>
<include>flinksessionjobs.flink.apache.org-v1.yml</include>
<include>flinkstatesnapshots.flink.apache.org-v1.yml</include>
<include>flinkbluegreendeployments.flink.apache.org-v1.yml</include>
</includes>
<filtering>false</filtering>
</resource>
Expand All @@ -236,6 +237,8 @@ under the License.
<classpath refid="maven.compile.classpath"/>
<arg value="file://${rootDir}/helm/flink-kubernetes-operator/crds/flinkdeployments.flink.apache.org-v1.yml"/>
<arg value="https://raw.githubusercontent.com/apache/flink-kubernetes-operator/release-1.9.0/helm/flink-kubernetes-operator/crds/flinkdeployments.flink.apache.org-v1.yml"/>
<arg value="https://raw.githubusercontent.com/apache/flink-kubernetes-operator/release-1.10.0/helm/flink-kubernetes-operator/crds/flinkdeployments.flink.apache.org-v1.yml"/>
<arg value="https://raw.githubusercontent.com/apache/flink-kubernetes-operator/release-1.11.0/helm/flink-kubernetes-operator/crds/flinkdeployments.flink.apache.org-v1.yml"/>
</java>
</target>
</configuration>
Expand All @@ -253,6 +256,24 @@ under the License.
<classpath refid="maven.compile.classpath"/>
<arg value="file://${rootDir}/helm/flink-kubernetes-operator/crds/flinksessionjobs.flink.apache.org-v1.yml"/>
<arg value="https://raw.githubusercontent.com/apache/flink-kubernetes-operator/release-1.9.0/helm/flink-kubernetes-operator/crds/flinksessionjobs.flink.apache.org-v1.yml"/>
<arg value="https://raw.githubusercontent.com/apache/flink-kubernetes-operator/release-1.10.0/helm/flink-kubernetes-operator/crds/flinksessionjobs.flink.apache.org-v1.yml"/>
<arg value="https://raw.githubusercontent.com/apache/flink-kubernetes-operator/release-1.11.0/helm/flink-kubernetes-operator/crds/flinksessionjobs.flink.apache.org-v1.yml"/>
</java>
</target>
</configuration>
</execution>
<execution>
<id>flinkbgdeployments-remove-scale-subresource</id>
<phase>package</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<java classname="org.apache.flink.kubernetes.operator.api.utils.RemoveScaleSubResource"
fork="true" failonerror="true">
<classpath refid="maven.compile.classpath"/>
<arg value="${rootDir}/helm/flink-kubernetes-operator/crds/flinkbluegreendeployments.flink.apache.org-v1.yml"/>
</java>
</target>
</configuration>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.kubernetes.operator.api;

import org.apache.flink.annotation.Experimental;
import org.apache.flink.kubernetes.operator.api.spec.FlinkBlueGreenDeploymentSpec;
import org.apache.flink.kubernetes.operator.api.status.FlinkBlueGreenDeploymentStatus;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import io.fabric8.kubernetes.api.model.Namespaced;
import io.fabric8.kubernetes.client.CustomResource;
import io.fabric8.kubernetes.model.annotation.Group;
import io.fabric8.kubernetes.model.annotation.ShortNames;
import io.fabric8.kubernetes.model.annotation.Version;

/** Custom resource definition that represents a deployments with Blue/Green rollout capability. */
@Experimental
@JsonInclude(JsonInclude.Include.NON_NULL)
@JsonDeserialize()
@Group(CrdConstants.API_GROUP)
@Version(CrdConstants.API_VERSION)
@ShortNames({"flinkbgdep"})
public class FlinkBlueGreenDeployment
extends CustomResource<FlinkBlueGreenDeploymentSpec, FlinkBlueGreenDeploymentStatus>
implements Namespaced {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.kubernetes.operator.api.bluegreen;

import org.apache.flink.kubernetes.operator.api.FlinkDeployment;

/**
* Enumeration of the two possible Flink Blue/Green deployment types. Only one of each type will be
* present at all times for a particular job.
*/
public enum DeploymentType {
/** Identifier for the first or "Blue" deployment type. */
BLUE,

/** Identifier for the second or "Green" deployment type. */
GREEN;

public static final String LABEL_KEY = "flink/blue-green-deployment-type";

public static DeploymentType fromDeployment(FlinkDeployment flinkDeployment) {
String typeAnnotation = flinkDeployment.getMetadata().getLabels().get(LABEL_KEY);
return DeploymentType.valueOf(typeAnnotation);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.kubernetes.operator.api.spec;

import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.ConfigOptions;

/** Configuration options to be used by the Flink Blue/Green Deployments. */
public class FlinkBlueGreenDeploymentConfigOptions {

public static final String BLUE_GREEN_CONF_PREFIX = "bluegreen.";

public static final int MIN_ABORT_GRACE_PERIOD_MS = 120000; // 2 mins

public static ConfigOptions.OptionBuilder operatorConfig(String key) {
return ConfigOptions.key(BLUE_GREEN_CONF_PREFIX + key);
}

public static final ConfigOption<Integer> ABORT_GRACE_PERIOD_MS =
operatorConfig("abortGracePeriodMs")
.intType()
.defaultValue(0)
.withDescription(
"The max time to wait for a deployment to become ready before aborting it, in milliseconds. Cannot be smaller than 2 minutes.");

public static final ConfigOption<Integer> RECONCILIATION_RESCHEDULING_INTERVAL_MS =
operatorConfig("reconciliationReschedulingIntervalMs")
.intType()
.defaultValue(15000) // 15 seconds
.withDescription(
"Configurable delay in milliseconds to use when the operator reschedules a reconciliation.");

public static final ConfigOption<Integer> DEPLOYMENT_DELETION_DELAY_MS =
operatorConfig("deploymentDeletionDelayMs")
.intType()
.defaultValue(0)
.withDescription(
"Configurable delay before deleting a deployment after being marked done.");
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.kubernetes.operator.api.spec;

import org.apache.flink.annotation.Experimental;

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

/** Spec that describes a Flink application with blue/green deployment capabilities. */
@Experimental
@Data
@NoArgsConstructor
@AllArgsConstructor
@JsonIgnoreProperties(ignoreUnknown = true)
public class FlinkBlueGreenDeploymentSpec {

private FlinkDeploymentTemplateSpec template;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.kubernetes.operator.api.spec;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import io.fabric8.kubernetes.api.model.ObjectMeta;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.SuperBuilder;

import java.util.LinkedHashMap;
import java.util.Map;

/** Template Spec that describes a Flink application managed by the blue/green controller. */
@AllArgsConstructor
@NoArgsConstructor
@Data
@SuperBuilder
public class FlinkDeploymentTemplateSpec {

@JsonProperty("metadata")
private ObjectMeta metadata;

@JsonProperty("configuration")
private Map<String, String> configuration;

@JsonProperty("spec")
private FlinkDeploymentSpec spec;

@JsonIgnore
private Map<String, Object> additionalProperties = new LinkedHashMap<String, Object>();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.kubernetes.operator.api.status;

/** Enumeration of the possible states of the blue/green transition. */
public enum FlinkBlueGreenDeploymentState {

/**
* We use this state while initializing for the first time, always with a "Blue" deployment
* type.
*/
INITIALIZING_BLUE,

/** Identifies the system is running normally with a "Blue" deployment type. */
ACTIVE_BLUE,

/** Identifies the system is running normally with a "Green" deployment type. */
ACTIVE_GREEN,

/** Identifies the system is transitioning from "Green" to "Blue". */
TRANSITIONING_TO_BLUE,

/** Identifies the system is transitioning from "Blue" to "Green". */
TRANSITIONING_TO_GREEN,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what state are we in during shutdown?

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.kubernetes.operator.api.status;

import org.apache.flink.annotation.Experimental;

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.ToString;
import lombok.experimental.SuperBuilder;

/** Last observed status of the Flink Blue/Green deployment. */
@Experimental
@Data
@AllArgsConstructor
@NoArgsConstructor
@ToString(callSuper = true)
@SuperBuilder
@JsonIgnoreProperties(ignoreUnknown = true)
public class FlinkBlueGreenDeploymentStatus {

private JobStatus jobStatus = new JobStatus();

/** The state of the blue/green transition. */
private FlinkBlueGreenDeploymentState blueGreenState;

/** Last reconciled (serialized) deployment spec. */
private String lastReconciledSpec;

/** Timestamp of last reconciliation. */
private String lastReconciledTimestamp;

/** Computed from abortGracePeriodMs, timestamp after which the deployment should be aborted. */
private String abortTimestamp;

/** Timestamp when the deployment became READY/STABLE. Used to determine when to delete it. */
private String deploymentReadyTimestamp;
}
Loading