From c03cce0cfddff273168ee03c6ff2bbbc6b72468d Mon Sep 17 00:00:00 2001 From: LIU ZHE YOU Date: Fri, 17 Apr 2026 13:27:43 +0800 Subject: [PATCH 01/16] Initial Java-SDK --- java-sdk/.editorconfig | 13 + java-sdk/.gitattributes | 11 + java-sdk/.gitignore | 56 +++ java-sdk/README.md | 74 ++++ java-sdk/build.gradle.kts | 33 ++ java-sdk/dags/stub_dag.py | 51 +++ java-sdk/example/build.gradle.kts | 55 +++ .../apache/airflow/example/JavaExample.java | 69 +++ java-sdk/gradle.properties | 23 + java-sdk/gradle/libs.versions.toml | 19 + java-sdk/gradle/wrapper/gradle-wrapper.jar | Bin 0 -> 45633 bytes .../gradle/wrapper/gradle-wrapper.properties | 23 + java-sdk/gradlew | 248 +++++++++++ java-sdk/gradlew.bat | 93 +++++ java-sdk/sdk/build.gradle.kts | 125 ++++++ .../kotlin/org/apache/airflow/sdk/Bundle.kt | 18 + .../org/apache/airflow/sdk/BundleInspector.kt | 40 ++ .../org/apache/airflow/sdk/BundleScanner.kt | 117 ++++++ .../kotlin/org/apache/airflow/sdk/Client.kt | 59 +++ .../kotlin/org/apache/airflow/sdk/Config.kt | 136 ++++++ .../org/apache/airflow/sdk/Connection.kt | 12 + .../main/kotlin/org/apache/airflow/sdk/Dag.kt | 86 ++++ .../org/apache/airflow/sdk/DagBundle.kt | 12 + .../kotlin/org/apache/airflow/sdk/Server.kt | 79 ++++ .../kotlin/org/apache/airflow/sdk/Task.kt | 8 + .../apache/airflow/sdk/execution/Client.kt | 143 +++++++ .../org/apache/airflow/sdk/execution/Comms.kt | 240 +++++++++++ .../apache/airflow/sdk/execution/DagParser.kt | 17 + .../apache/airflow/sdk/execution/JarUtils.kt | 25 ++ .../apache/airflow/sdk/execution/Logger.kt | 108 +++++ .../apache/airflow/sdk/execution/MsgPack.kt | 125 ++++++ .../org/apache/airflow/sdk/execution/Serde.kt | 275 ++++++++++++ .../airflow/sdk/execution/Supervisor.kt | 394 ++++++++++++++++++ .../airflow/sdk/execution/TaskRunner.kt | 28 ++ .../airflow/sdk/execution/TaskSdkFrames.kt | 235 +++++++++++ .../apache/airflow/sdk/BundleScannerTest.kt | 197 +++++++++ .../org/apache/airflow/sdk/BundleTest.kt | 28 ++ .../org/apache/airflow/sdk/ConfigTest.kt | 138 ++++++ .../apache/airflow/sdk/CoordinatorCommTest.kt | 113 +++++ .../apache/airflow/sdk/execution/CommsTest.kt | 86 ++++ .../airflow/sdk/execution/DagParserTest.kt | 28 ++ .../SerializationCompatibilityTest.kt | 151 +++++++ .../airflow/sdk/execution/TaskRunnerTest.kt | 110 +++++ java-sdk/settings.gradle.kts | 13 + java-sdk/validation/serialization/compare.py | 171 ++++++++ .../serialization/serialize_python.py | 125 ++++++ .../validation/serialization/test_dags.yaml | 198 +++++++++ 47 files changed, 4408 insertions(+) create mode 100644 java-sdk/.editorconfig create mode 100644 java-sdk/.gitattributes create mode 100644 java-sdk/.gitignore create mode 100644 java-sdk/README.md create mode 100644 java-sdk/build.gradle.kts create mode 100644 java-sdk/dags/stub_dag.py create mode 100644 java-sdk/example/build.gradle.kts create mode 100644 java-sdk/example/src/java/org/apache/airflow/example/JavaExample.java create mode 100644 java-sdk/gradle.properties create mode 100644 java-sdk/gradle/libs.versions.toml create mode 100644 java-sdk/gradle/wrapper/gradle-wrapper.jar create mode 100644 java-sdk/gradle/wrapper/gradle-wrapper.properties create mode 100755 java-sdk/gradlew create mode 100644 java-sdk/gradlew.bat create mode 100644 java-sdk/sdk/build.gradle.kts create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Bundle.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleInspector.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleScanner.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Client.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Config.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Connection.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Dag.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/DagBundle.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Server.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Task.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Client.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Comms.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/DagParser.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/JarUtils.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Logger.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/MsgPack.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Supervisor.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskRunner.kt create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskSdkFrames.kt create mode 100644 java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleScannerTest.kt create mode 100644 java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleTest.kt create mode 100644 java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/ConfigTest.kt create mode 100644 java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/CoordinatorCommTest.kt create mode 100644 java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/CommsTest.kt create mode 100644 java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/DagParserTest.kt create mode 100644 java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SerializationCompatibilityTest.kt create mode 100644 java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TaskRunnerTest.kt create mode 100644 java-sdk/settings.gradle.kts create mode 100644 java-sdk/validation/serialization/compare.py create mode 100644 java-sdk/validation/serialization/serialize_python.py create mode 100644 java-sdk/validation/serialization/test_dags.yaml diff --git a/java-sdk/.editorconfig b/java-sdk/.editorconfig new file mode 100644 index 0000000000000..37bdc0ac6ea59 --- /dev/null +++ b/java-sdk/.editorconfig @@ -0,0 +1,13 @@ +root = true + +[*] +end_of_line = lf +insert_final_newline = true +indent_style = space +indent_size = 4 + +[*.java] +indent_size = 2 + +[*.kt] +indent_size = 2 diff --git a/java-sdk/.gitattributes b/java-sdk/.gitattributes new file mode 100644 index 0000000000000..a87d264c425cf --- /dev/null +++ b/java-sdk/.gitattributes @@ -0,0 +1,11 @@ +# +# https://help.github.com/articles/dealing-with-line-endings/ +# +# Linux start script should use lf +/gradlew text eol=lf + +# These are Windows script files and should use crlf +*.bat text eol=crlf + +# Binary files should be left untouched +*.jar binary diff --git a/java-sdk/.gitignore b/java-sdk/.gitignore new file mode 100644 index 0000000000000..bf1f44332ebc8 --- /dev/null +++ b/java-sdk/.gitignore @@ -0,0 +1,56 @@ +.gradle +build/ +!gradle/wrapper/gradle-wrapper.jar +!**/src/main/**/build/ +!**/src/test/**/build/ +.kotlin + +### IntelliJ IDEA ### +.idea/modules.xml +.idea/jarRepositories.xml +.idea/compiler.xml +.idea/libraries/ +*.iws +*.iml +*.ipr +out/ +!**/src/main/**/out/ +!**/src/test/**/out/ + +### Eclipse ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache +bin/ +!**/src/main/**/bin/ +!**/src/test/**/bin/ + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ + +### VS Code ### +.vscode/ + +### Mac OS ### +.DS_Store +# Ignore Gradle build output directory +build + +### Artifacts of airflow standalone command ### +airflow.cfg +airflow.db +simple_auth_manager_passwords.json.generated +logs/dag_id=* +logs/dag_processor + +### Compatibility Test Results ### +validation/serialization/serialized_java.json +validation/serialization/serialized_python.json diff --git a/java-sdk/README.md b/java-sdk/README.md new file mode 100644 index 0000000000000..92ef9ebdc5dd8 --- /dev/null +++ b/java-sdk/README.md @@ -0,0 +1,74 @@ + + +# Airflow Java SDK + +A **JVM** SDK for Apache Airflow. You can use any JVM-compatible language to write +workflow bundles, and have Airflow consume the result. + +The SDK and execution-time logic is implemented in Kotlin. +An example is bundled showing how the SDK can be used in Java. + +## Building + +```bash +./gradlew build +``` + +## Technical Details + +The Java program is launched as a subprocess by the Airflow worker and communicates +through TCP sockets. The Java program accepts flags `--comm` and `--logs` from the +command line. + +The Java program "parses" DAGs on launch, and then connects to the specified TCP servers. +The rest is similar to the standard Airflow: + +* DAG-parsing: + 1. On connection, the parent immediately sends a DagParsingRequest through the socket. + 2. The Java program sends back a DagParsingResult to the parent. + 3. The Java program exits. +* Execution: + 1. On connection, the parent immediately sends a StartupDetails through the socket. + 2. The Java program uses the information to find the relevant task to execute. + 3. The task is run. + 4. The Java program tells the parent to update the task's terminal state. + 5. The Java program exits. + +Communication uses the same formats as the Python-based processes. + +## Serialization Validation + +Workflow: + +```bash +# 1. Generate Java output (runs as part of normal test suite) +# More specifically, the test `sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SerializationCompatibilityTest.kt` generates the output file `validation/serialization/serialized_java.json`. +./gradlew sdk:test + +# 2. Generate Python output (requires Airflow env) +uv run validation/serialization/serialize_python.py \ + validation/serialization/test_dags.yaml \ + validation/serialization/serialized_python.json + +# 3. Compare +uv run validation/serialization/compare.py \ + validation/serialization/serialized_python.json \ + validation/serialization/serialized_java.json +``` diff --git a/java-sdk/build.gradle.kts b/java-sdk/build.gradle.kts new file mode 100644 index 0000000000000..a9fb8d993533b --- /dev/null +++ b/java-sdk/build.gradle.kts @@ -0,0 +1,33 @@ +import com.diffplug.gradle.spotless.SpotlessExtension +import org.jetbrains.kotlin.gradle.dsl.JvmTarget + +plugins { + kotlin("jvm") version "2.3.0" + id("com.diffplug.spotless") version "7.2.1" // Last version supporting JDK 11. + id("org.jlleitschuh.gradle.ktlint") version "14.0.1" +} + +allprojects { + apply(plugin = "com.diffplug.spotless") + apply(plugin = "org.jetbrains.kotlin.jvm") + apply(plugin = "org.jlleitschuh.gradle.ktlint") + + repositories { mavenCentral() } + + java { + toolchain { + languageVersion.set(JavaLanguageVersion.of(11)) + } + sourceCompatibility = JavaVersion.VERSION_11 + } + kotlin { compilerOptions { jvmTarget = JvmTarget.JVM_11 } } + + configure { + java { + target("**/*.java") + googleJavaFormat().formatJavadoc(false) + trimTrailingWhitespace() + endWithNewline() + } + } +} diff --git a/java-sdk/dags/stub_dag.py b/java-sdk/dags/stub_dag.py new file mode 100644 index 0000000000000..65a0832e6a8d3 --- /dev/null +++ b/java-sdk/dags/stub_dag.py @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from airflow.sdk import dag, task + + +@task() +def python_task_1(ti): + print("python_task_1") + print("Push Python Task 'python_task_1' XCom:") + ti.xcom_push(value="value-pushed-from-python_task_1", key="return_value") + + +@task.stub(language="java") +def extract(): ... + + +@task.stub(language="java") +def transform(): ... + + +@task() +def python_task_2(ti): + print("python_task_2") + print("Pull Java Task 'transform' XCom:") + print(ti.xcom_pull(task_ids="transform")) + + +@dag(dag_id="java_example") +def simple_dag(): + + python_task_1() >> extract() >> transform() >> python_task_2() + + +simple_dag() diff --git a/java-sdk/example/build.gradle.kts b/java-sdk/example/build.gradle.kts new file mode 100644 index 0000000000000..e36ff5bd68199 --- /dev/null +++ b/java-sdk/example/build.gradle.kts @@ -0,0 +1,55 @@ +plugins { + application +} + +dependencies { + implementation(project(":sdk")) + implementation("org.slf4j:slf4j-simple:2.0.17") +} + +sourceSets { + main { + java.srcDir("src/java") + } +} + +application { + mainClass = "org.apache.airflow.example.JavaExample" +} + +val bundleMainClass = application.mainClass.get() +val metadataFileName = "airflow-metadata.yaml" +val metadataOutputDir = layout.buildDirectory.dir("airflow-metadata") +val dagCodeSourcePath = bundleMainClass.replace('.', '/') + ".java" +val dagCodeFileName = bundleMainClass.substringAfterLast('.') + ".java" + +val inspectBundle = + tasks.register("inspectBundle") { + dependsOn("classes") + classpath = sourceSets.main.get().runtimeClasspath + mainClass.set("org.apache.airflow.sdk.BundleInspector") + args = + listOf( + bundleMainClass, + metadataOutputDir + .get() + .file(metadataFileName) + .asFile.absolutePath, + ) + } + +tasks.withType { + dependsOn(inspectBundle) + from(metadataOutputDir) + from("src/java/$dagCodeSourcePath") + manifest { + attributes( + "Main-Class" to bundleMainClass, + "Airflow-Java-SDK-Version" to project.version, + "Airflow-Java-SDK-Metadata" to metadataFileName, + "Airflow-Java-SDK-Dag-Code" to dagCodeFileName, + "Implementation-Title" to "Example Java bundle", + "Implementation-Version" to "1", + ) + } +} diff --git a/java-sdk/example/src/java/org/apache/airflow/example/JavaExample.java b/java-sdk/example/src/java/org/apache/airflow/example/JavaExample.java new file mode 100644 index 0000000000000..1a681dd2c9391 --- /dev/null +++ b/java-sdk/example/src/java/org/apache/airflow/example/JavaExample.java @@ -0,0 +1,69 @@ +package org.apache.airflow.example; + +import java.util.Date; +import java.util.List; +import org.apache.airflow.sdk.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class JavaExample implements DagBundle { + private static final Logger logger = LoggerFactory.getLogger(JavaExample.class); + + public static class Extract implements Task { + public void execute(Client client) throws Exception { + logger.info("Hello from task"); + + var python_xcom = client.getXCom("python_task_1"); + logger.info("Got XCom from Python Task 'python_task_1' {}", python_xcom); + + var connection = client.getConnection("test_http"); + logger.info("Got con {}", connection); + + for (var i = 0; i < 3; i++) { + logger.info("Beep {}, next time will be {}", i, new Date()); + Thread.sleep(2 * 1000); + } + + client.setXCom(new Date().getTime()); + logger.info("Goodbye from task"); + } + } + + public static class Transform implements Task { + public void execute(Client client) { + var extract_xcom = client.getXCom("extract"); + logger.info("Got XCom from 'extract' {}", extract_xcom); + + var variable = client.getVariable("my_variable"); + logger.info("Got variable {}", variable); + + logger.info("Push XCom to python task 2"); + client.setXCom(new Date().getTime()); + } + } + + public static class Load implements Task { + public void execute(Client client) { + var xcom = client.getXCom("transform"); + logger.info("Got XCom from 'transform' {}", xcom); + throw new RuntimeException("I failed"); + } + } + + @Override + public List getDags() { + var javaExample = new Dag("java_example", /* description= */ null, /* schedule= */ "@daily"); + javaExample.addTask("extract", Extract.class, List.of()); + javaExample.addTask("transform", Transform.class, List.of("extract")); + javaExample.addTask("load", Load.class, List.of("transform")); + return List.of(javaExample); + } + + public static void main(String[] args) { + var example = new JavaExample(); + var bundle = + new Bundle(JavaExample.class.getPackage().getImplementationVersion(), example.getDags()); + + Server.create(args).serve(bundle); + } +} diff --git a/java-sdk/gradle.properties b/java-sdk/gradle.properties new file mode 100644 index 0000000000000..7ec9aa8974afa --- /dev/null +++ b/java-sdk/gradle.properties @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This file was generated by the Gradle 'init' task. +# https://docs.gradle.org/current/userguide/build_environment.html#sec:gradle_configuration_properties + +org.gradle.configuration-cache=true + +airflowExecApiVersion=2025-11-05 diff --git a/java-sdk/gradle/libs.versions.toml b/java-sdk/gradle/libs.versions.toml new file mode 100644 index 0000000000000..a0ac505d527fe --- /dev/null +++ b/java-sdk/gradle/libs.versions.toml @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This file was generated by the Gradle 'init' task. +# https://docs.gradle.org/current/userguide/platforms.html#sub::toml-dependencies-format diff --git a/java-sdk/gradle/wrapper/gradle-wrapper.jar b/java-sdk/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000000000000000000000000000000000000..f8e1ee3125fe0768e9a76ee977ac089eb657005e GIT binary patch literal 45633 zcma&NV|1n6wyqu9PQ|uu+csuwn-$x(T~Woh?Nr6KUD3(A)@l1Yd+oj6Z_U=8`RAE` z#vE6_`?!1WLs1443=Ieh3JM4ai0JG2|2{}S&_HrxszP*9^5P7#QX*pVDq?D?;6T8C z{bWO1$9at%!*8ax*TT&F99vwf1Ls+3lklsb|bC`H`~Q z_w}*E9P=Wq;PYlGYhZ^lt#N97bt5aZ#mQcOr~h^B;R>f-b0gf{y(;VA{noAt`RZzU z7vQWD{%|q!urW2j0Z&%ChtL(^9m` zgaU%|B;V#N_?%iPvu0PVkX=1m9=*SEGt-Lp#&Jh%rz6EJXlV^O5B5YfM5j{PCeElx z8sipzw8d=wVhFK+@mgrWyA)Sv3BJq=+q+cL@=wuH$2;LjY z^{&+X4*HFA0{QvlM_V4PTQjIdd;d|2YuN;s|bi!@<)r-G%TuOCHz$O(_-K z)5in&6uNN<0UfwY=K>d;cL{{WK2FR|NihJMN0Q4X+(1lE)$kY?T$7UWleIU`i zQG#X-&&m-8x^(;n@o}$@vPMYRoq~|FqC~CU3MnoiifD{(CwAGd%X#kFHq#4~%_a!{ zeX{XXDT#(DvX7NtAs7S}2ZuiZ>gtd;tCR7E)3{J^`~#Vd**9qz%~JRFAiZf{zt|Dr zvQw!)n7fNUn_gH`o9?8W8t_%x6~=y*`r46bjj(t{YU*qfqd}J}*mkgUfsXTI>Uxl6 z)Fj>#RMy{`wINIR;{_-!xGLgVaTfNJ2-)%YUfO&X5z&3^E#4?k-_|Yv$`fpgYkvnA%E{CiV zP|-zAf8+1@R`sT{rSE#)-nuU7Pwr-z>0_+CLQT|3vc-R22ExKT4ym@Gj77j$aTVns zp4Kri#Ml?t7*n(;>nkxKdhOU9Qbwz%*#i9_%K<`m4T{3aPbQ?J(Mo`6E5cDdbAk%X z+4bN%E#a(&ZXe{G#V!2Nt+^L$msKVHP z|APpBhq7knz(O2yY)$$VyI_Xg4UIC*$!i7qQG~KEZnO@Q1i89@4ZKW*3^Wh?o?zSkfPxdhnTxlO!3tAqe_ zuEqHVcAk3uQIFTpP~C{d$?>7yt3G3Fo>syXTus>o0tJdFpQWC27hDiwC%O09i|xCq z@H6l|+maB;%CYQIChyhu;PVYz9e&5a@EEQs3$DS6dLIS+;N@I0)V}%B`jdYv;JDck zd|xxp(I?aedivE7*19hesoa-@Xm$^EHbbVmh$2^W-&aTejsyc$i+}A#n2W*&0Qt`5 zJS!2A|LVV;L!(*x2N)GjJC;b1RB_f(#D&g_-};a*|BTRvfdIX}Gau<;uCylMNC;UG zzL((>6KQBQ01wr%7u9qI2HLEDY!>XisIKb#6=F?pAz)!_JX}w|>1V>X^QkMdFi@Jr z`1N*V4xUl{qvECHoF?#lXuO#Dg2#gh|AU$Wc=nuIbmVPBEGd(R#&Z`TP9*o%?%#ob zWN%ByU+55yBNfjMjkJnBjT!cVDi}+PR3N&H(f8$d^Pu;A_WV*{)c2Q{IiE7&LPsd4 z!rvkUf{sco_WNSIdW+btM#O+4n`JiceH6%`7pDV zRqJ@lj=Dt(e-Gkz$b!c2>b)H$lf(fuAPdIsLSe(dZ4E~9+Ge!{3j~>nS%r)eQZ;Iq ztWGpp=2Ptc!LK_TQ8cgJXUlU5mRu|7F2{eu*;a>_5S<;bus=t*IXcfzJRPv4xIs;s zt2<&}OM>KxkTxa=dFMfNr42=DL~I}6+_{`HT_YJBiWkpVZND1Diad~Yr*Fuq{zljr z*_+jXk=qVBdwlQkYuIrB4GG*#voba$?h*u0uRNL+87-?AjzG2X_R9mzQ7BJEawutObr|ey~%in>6k%A`K*`pb-|DF5m})!`b=~osoiW2)IFh?_y9y<3Cix_ znvC=bjBX1J820!%%9FaB@v?hAsd05e@w$^ZAvtUp*=Bi+Owkl?rLa6F#yl{s+?563 zmn2 zV95%gySAJ$L!Vvk4kx!n@mo`3Mfi`2lXUkBmd%)u)7C?Pa;oK~zUQ#p0u{a|&0;zNO#9a4`v^3df90X#~l_k$q7n&L5 z?TszF842~g+}tgUP}UG?ObLCE1(Js_$e>XS7m%o7j@@VdxePtg)w{i5an+xK95r?s zDeEhgMO-2$H?@0{p-!4NJ)}zP+3LzZB?FVap)ObHV6wp}Lrxvz$cjBND1T6ln$EfJ zZRPeR2lP}K0p8x`ahxB??Ud;i7$Y5X!5}qBFS+Zp=P^#)08nQi_HuJcN$0=x;2s53 zwoH}He9BlKT4GdWfWt)@o@$4zN$B@5gVIN~aHtwIhh{O$uHiMgYl=&Vd$w#B2 zRv+xK3>4E{!)+LXA2#*K6H~HpovXAQeXV(^Pd%G_>ro0(4_@`{2Ag(+8{9pqJ>Co$ zRRV(oX;nD+Jel_2^BlNO=cQP8q*G#~R3PTERUxvug_C4T3qwb9MQE|^{5(H*nt`fn z^%*p-RwkAhT6(r>E@5w8FaB)Q<{#`H9fTdc6QBuSr9D-x!Tb9f?wI=M{^$cB5@1;0 z+yLHh?3^c-Qte@JI<SW`$bs5Vv9!yWjJD%oY z8Cdc$a(LLy@tB2)+rUCt&0$&+;&?f~W6+3Xk3g zy9L�|d9Zj^A1Dgv5yzCONAB>8LM`TRL&7v_NKg(bEl#y&Z$py}mu<4DrT@8HHjE zqD@4|aM>vt!Yvc2;9Y#V;KJ8M>vPjiS2ycq52qkxInUK*QqA3$&OJ`jZBo zpzw&PT%w0$D94KD%}VN9c)eCueh1^)utGt2OQ+DP(BXszodfc1kFPWl~BQ5Psy*d`UIf zc}zQ8TVw35jdCSc78)MljC-g3$GX2$<0<3MEQXS&i<(ZFClz9WlL}}?%u>S2hhEk_ zyzfm&@Q%YVB-vw3KH|lU#c_)0aeG^;aDG&!bwfOz_9)6gLe;et;h(?*0d-RV0V)1l zzliq#`b9Y*c`0!*6;*mU@&EFSbW>9>L5xUX+unp%@tCW#kLfz)%3vwN{1<-R*g+B_C^W8)>?n%G z<#+`!wU$L&dn)Pz(9DGGI%RlmM2RpeDy9)31OZV$c2T>-Jl&4$6nul&e7){1u-{nP zE$uZs%gyanu+yBcAb+jTYGy(^<;&EzeLeqveN12Lvv)FQFn0o&*qAaH+gLJ)*xT9y z>`Y`W?M#K7%w26w?Oen>j7=R}EbZ;+jcowV&i}P|IfW^C5GJHt5D;Q~)|=gW3iQ;N zQGl4SQFtz=&~BGon6hO@mRnjpmM79ye^LY_L2no{f_M?j80pr`o3BrI7ice#8#Zt4 zO45G97Hpef+AUEU%jN-dLmPYHY(|t#D)9|IeB^i1X|eEq+ymld_Uj$l^zVAPRilx- z^II$sL4G~{^7?sik2BK7;ZV-VIVhrKjUxBIsf^N&K`)5;PjVg-DTm1Xtw4-tGtElU zJgVTCk4^N4#-kPuX=7p~GMf5Jj5A#>)GX)FIcOqY4lf}Vv2gjrOTuFusB@ERW-&fb zTp=E0E?gXkwzn)AMMY*QCftp%MOL-cbsG{02$0~b?-JD{-nwj58 zBHO1YL~yn~RpnZ6*;XA|MSJeBfX-D?afH*E!2uGjT%k!jtx~OG_jJ`Ln}lMQb7W41 zmTIRd%o$pu;%2}}@2J$x%fg{DZEa-Wxdu6mRP~Ea0zD2+g;Dl*to|%sO-5mUrZ`~C zjJ zUe^**YRgBvlxl<(r0LjxjSQKiTx+E<7$@9VO=RYgL9ldTyKzfqR;Y&gu^ub!fVX7u z3H@;8j#tVgga~EMuXv_#Q8<*uK@R{mGzn92eDYkF1sbxh5!P|M-D)T~Ae*SO`@u$Q z7=5s)HM)w~s2j5{I67cqSn6BLLhCMcn0=OTVE?T7bAmY!T+xZ_N3op~wZ3Oxlm6(a5qB({6KghlvBd9HJ#V6YY_zxbj-zI`%FN|C*Q`DiV z#>?Kk7VbuoE*I9tJaa+}=i7tJnMRn`P+(08 za*0VeuAz!eI7giYTsd26P|d^E2p1f#oF*t{#klPhgaShQ1*J7?#CTD@iDRQIV+Z$@ z>qE^3tR3~MVu=%U%*W(1(waaFG_1i5WE}mvAax;iwZKv^g1g}qXY7lAd;!QQa#5e= z1_8KLHje1@?^|6Wb(A{HQ_krJJP1GgE*|?H0Q$5yPBQJlGi;&Lt<3Qc+W4c}Ih~@* zj8lYvme}hwf@Js%Oj=4BxXm15E}7zS0(dW`7X0|$damJ|gJ6~&qKL>gB_eC7%1&Uh zLtOkf7N0b;B`Qj^9)Bfh-( z0or96!;EwEMnxwp!CphwxxJ+DDdP4y3F0i`zZp-sQ5wxGIHIsZCCQz5>QRetx8gq{ zA33BxQ}8Lpe!_o?^u2s3b!a-$DF$OoL=|9aNa7La{$zI#JTu_tYG{m2ly$k?>Yc); zTA9ckzd+ibu>SE6Rc=Yd&?GA9S5oaQgT~ER-|EwANJIAY74|6 z($#j^GP}EJqi%)^jURCj&i;Zl^-M9{=WE69<*p-cmBIz-400wEewWVEd^21}_@A#^ z2DQMldk_N)6bhFZeo8dDTWD@-IVunEY*nYRON_FYII-1Q@@hzzFe(lTvqm}InfjQ2 zN>>_rUG0Lhaz`s;GRPklV?0 z;~t4S8M)ZBW-ED?#UNbCrsWb=??P># zVc}MW_f80ygG_o~SW+Q6oeIUdFqV2Fzys*7+vxr^ZDeXcZZc;{kqK;(kR-DKL zByDdPnUQgnX^>x?1Tz~^wZ%Flu}ma$Xmgtc7pSmBIH%&H*Tnm=L-{GzCv^UBIrTH5 zaoPO|&G@SB{-N8Xq<+RVaM_{lHo@X-q}`zjeayVZ9)5&u*Y>1!$(wh9Qoe>yWbPgw zt#=gnjCaT_+$}w^*=pgiHD8N$hzqEuY5iVL_!Diw#>NP7mEd?1I@Io+?=$?7cU=yK zdDKk_(h_dB9A?NX+&=%k8g+?-f&`vhAR}&#zP+iG%;s}kq1~c{ac1@tfK4jP65Z&O zXj8Ew>l7c|PMp!cT|&;o+(3+)-|SK&0EVU-0-c&guW?6F$S`=hcKi zpx{Z)UJcyihmN;^E?*;fxjE3kLN4|&X?H&$md+Ege&9en#nUe=m>ep3VW#C?0V=aS zLhL6v)|%$G5AO4x?Jxy8e+?*)YR~<|-qrKO7k7`jlxpl6l5H&!C4sePiVjAT#)b#h zEwhfkpFN9eY%EAqg-h&%N>E0#%`InXY?sHyptcct{roG42Mli5l)sWt66D_nG2ed@ z#4>jF?sor7ME^`pDlPyQ(|?KL9Q88;+$C&3h*UV*B+*g$L<{yT9NG>;C^ZmPbVe(a z09K^qVO2agL`Hy{ISUJ{khPKh@5-)UG|S8Sg%xbJMF)wawbgll3bxk#^WRqmdY7qv zr_bqa3{`}CCbREypKd!>oIh^IUj4yl1I55=^}2mZAAW6z}Kpt3_o1b4__sQ;b zv)1=xHO?gE-1FL}Y$0YdD-N!US;VSH>UXnyKoAS??;T%tya@-u zfFo)@YA&Q#Q^?Mtam19`(PS*DL{PHjEZa(~LV7DNt5yoo1(;KT)?C7%^Mg;F!C)q= z6$>`--hQX4r?!aPEXn;L*bykF1r8JVDZ)x4aykACQy(5~POL;InZPU&s5aZm-w1L< z`crCS5=x>k_88n(*?zn=^w*;0+8>ui2i>t*Kr!4?aA1`yj*GXi#>$h8@#P{S)%8+N zCBeL6%!Ob1YJs5+a*yh{vZ8jH>5qpZhz_>(ph}ozKy9d#>gba1x3}`-s_zi+SqIeR z0NCd7B_Z|Fl+(r$W~l@xbeAPl5{uJ{`chq}Q;y8oUN0sUr4g@1XLZQ31z9h(fE_y( z_iQ(KB39LWd;qwPIzkvNNkL(P(6{Iu{)!#HvBlsbm`g2qy&cTsOsAbwMYOEw8!+75D!>V{9SZ?IP@pR9sFG{T#R*6ez2&BmP8*m^6+H2_ z>%9pg(+R^)*(S21iHjLmdt$fmq6y!B9L!%+;wL5WHc^MZRNjpL9EqbBMaMns2F(@h zN0BEqZ3EWGLjvY&I!8@-WV-o@>biD;nx;D}8DPapQF5ivpHVim8$G%3JrHtvN~U&) zb1;=o*lGfPq#=9Moe$H_UhQPBjzHuYw;&e!iD^U2veY8)!QX_E(X@3hAlPBIc}HoD z*NH1vvCi5xy@NS41F1Q3=Jkfu&G{Syin^RWwWX|JqUIX_`}l;_UIsj&(AFQ)ST*5$ z{G&KmdZcO;jGIoI^+9dsg{#=v5eRuPO41<*Ym!>=zHAXH#=LdeROU-nzj_@T4xr4M zJI+d{Pp_{r=IPWj&?%wfdyo`DG1~|=ef?>=DR@|vTuc)w{LHqNKVz9`Dc{iCOH;@H5T{ zc<$O&s%k_AhP^gCUT=uzrzlEHI3q`Z3em0*qOrPHpfl1v=8Xkp{!f9d2p!4 zL40+eJB4@5IT=JTTawIA=Z%3AFvv=l1A~JX>r6YUMV7GGLTSaIn-PUw| z;9L`a<)`D@Qs(@P(TlafW&-87mcZuwFxo~bpa01_M9;$>;4QYkMQlFPgmWv!eU8Ut zrV2<(`u-@1BTMc$oA*fX;OvklC1T$vQlZWS@&Wl}d!72MiXjOXxmiL8oq;sP{)oBe zS#i5knjf`OfBl}6l;BSHeY31w8c~8G>$sJ9?^^!)Z*Z*Xg zbTbkcbBpgFui(*n32hX~sC7gz{L?nlnOjJBd@ zUC4gd`o&YB4}!T9JGTe9tqo0M!JnEw4KH7WbrmTRsw^Nf z^>RxG?2A33VG3>E?iN|`G6jgr`wCzKo(#+zlOIzp-^E0W0%^a>zO)&f(Gc93WgnJ2p-%H-xhe{MqmO z8Iacz=Qvx$ML>Lhz$O;3wB(UI{yTk1LJHf+KDL2JPQ6#m%^bo>+kTj4-zQ~*YhcqS z2mOX!N!Q$d+KA^P0`EEA^%>c12X(QI-Z}-;2Rr-0CdCUOZ=7QqaxjZPvR%{pzd21HtcUSU>u1nw?)ZCy+ zAaYQGz59lqhNXR4GYONpUwBU+V&<{z+xA}`Q$fajmR86j$@`MeH}@zz*ZFeBV9Ot< ze8BLzuIIDxM&8=dS!1-hxiAB-x-cVmtpN}JcP^`LE#2r9ti-k8>Jnk{?@Gw>-WhL=v+H!*tv*mcNvtwo)-XpMnV#X>U1F z?HM?tn^zY$6#|(|S~|P!BPp6mur58i)tY=Z-9(pM&QIHq+I5?=itn>u1FkXiehCRC zW_3|MNOU)$-zrjKnU~{^@i9V^OvOJMp@(|iNnQ%|iojG2_Snnt`1Cqx2t)`vW&w2l zwb#`XLNY@FsnC-~O&9|#Lpvw7n!$wL9azSk)$O}?ygN@FEY({2%bTl)@F2wevCv`; zZb{`)uMENiwE|mti*q5U4;4puX{VWFJ#QIaa*%IHKyrU*HtjW_=@!3SlL~pqLRs?L zoqi&}JLsaP)yEH!=_)zmV-^xy!*MCtc{n|d%O zRM>N>eMG*Qi_XAxg@82*#zPe+!!f#;xBxS#6T-$ziegN-`dLm z=tTN|xpfCPng06|X^6_1JgN}dM<_;WsuL9lu#zLVt!0{%%D9*$nT2E>5@F(>Fxi%Y zpLHE%4LZSJ1=_qm0;^Wi%x56}k3h2Atro;!Ey}#g&*BpbNXXS}v>|nn=Mi0O(5?=1V7y1^1Bdt5h3}oL@VsG>NAH z1;5?|Sth=0*>dbXSQ%MQKB?eN$LRu?yBy@qQVaUl*f#p+sLy$Jd>*q;(l>brvNUbIF0OCf zk%Q;Zg!#0w0_#l)!t?3iz~`X8A>Yd3!P&A4Ov6&EdZmOixeTd4J`*Wutura(}4w@KV>i#rf(0PYL&v^89QiXBP6sj=N;q8kVxS}hA! z|3QaiYz!w+xQ%9&Zg${JgQ*Ip_bg2rmmG`JkX^}&5gbZF!Z(gDD1s5{QwarPK(li- zW9y-CiQ`5Ug1ceN1w7lCxl=2}7c*8_XH8W7y0AICn19qZ`w}z0iCJ$tJ}NjzQCH90 zc!UzpKvk%3;`XfFi2;F*q2eMQQ5fzO{!`KU1T^J?Z64|2Z}b1b6h80_H%~J)J)kbM0hsj+FV6%@_~$FjK9OG7lY}YA zRzyYxxy18z<+mCBiX?3Q{h{TrNRkHsyF|eGpLo0fKUQ|19Z0BamMNE9sW z?vq)r`Qge{9wN|ezzW=@ojpVQRwp##Q91F|B5c`a0A{HaIcW>AnqQ*0WT$wj^5sWOC1S;Xw7%)n(=%^in zw#N*+9bpt?0)PY$(vnU9SGSwRS&S!rpd`8xbF<1JmD&6fwyzyUqk){#Q9FxL*Z9%#rF$} zf8SsEkE+i91VY8d>Fap#FBacbS{#V&r0|8bQa;)D($^v2R1GdsQ8YUk(_L2;=DEyN%X*3 z;O@fS(pPLRGatI93mApLsX|H9$VL2)o(?EYqlgZMP{8oDYS8)3G#TWE<(LmZ6X{YA zRdvPLLBTatiUG$g@WK9cZzw%s6TT1Chmw#wQF&&opN6^(D`(5p0~ zNG~fjdyRsZv9Y?UCK(&#Q2XLH5G{{$9Y4vgMDutsefKVVPoS__MiT%qQ#_)3UUe=2fK)*36yXbQUp#E98ah(v`E$c3kAce_8a60#pa7rq6ZRtzSx6=I^-~A|D%>Riv{Y`F9n3CUPL>d`MZdRmBzCum2K%}z@Z(b7#K!-$Hb<+R@Rl9J6<~ z4Wo8!!y~j(!4nYsDtxPIaWKp+I*yY(ib`5Pg356Wa7cmM9sG6alwr7WB4IcAS~H3@ zWmYt|TByC?wY7yODHTyXvay9$7#S?gDlC?aS147Ed7zW!&#q$^E^_1sgB7GKfhhYu zOqe*Rojm~)8(;b!gsRgQZ$vl5mN>^LDgWicjGIcK9x4frI?ZR4Z%l1J=Q$0lSd5a9 z@(o?OxC72<>Gun*Y@Z8sq@od{7GGsf8lnBW^kl6sX|j~UA2$>@^~wtceTt^AtqMIx zO6!N}OC#Bh^qdQV+B=9hrwTj>7HvH1hfOQ{^#nf%e+l)*Kgv$|!kL5od^ka#S)BNT z{F(miX_6#U3+3k;KxPyYXE0*0CfL8;hDj!QHM@)sekF9uyBU$DRZkka4ie^-J2N8w z3PK+HEv7kMnJU1Y+>rheEpHdQ3_aTQkM3`0`tC->mpV=VtvU((Cq$^(S^p=+$P|@} zueLA}Us^NTI83TNI-15}vrC7j6s_S`f6T(BH{6Jj{Lt;`C+)d}vwPGx62x7WXOX19 z2mv1;f^p6cG|M`vfxMhHmZxkkmWHRNyu2PDTEpC(iJhH^af+tl7~h?Y(?qNDa`|Ogv{=+T@7?v344o zvge%8Jw?LRgWr7IFf%{-h>9}xlP}Y#GpP_3XM7FeGT?iN;BN-qzy=B# z=r$79U4rd6o4Zdt=$|I3nYy;WwCb^`%oikowOPGRUJ3IzChrX91DUDng5_KvhiEZwXl^y z+E!`Z6>}ijz5kq$nNM8JA|5gf_(J-);?SAn^N-(q2r6w31sQh6vLYp^ z<>+GyGLUe_6eTzX7soWpw{dDbP-*CsyKVw@I|u`kVX&6_h5m!A5&3#=UbYHYJ5GK& zLcq@0`%1;8KjwLiup&i&u&rmt*LqALkIqxh-)Exk&(V)gh9@Fn+WU=6-UG^X2~*Q-hnQ$;;+<&lRZ>g0I`~yuv!#84 zy>27(l&zrfDI!2PgzQyV*R(YFd`C`YwR_oNY+;|79t{NNMN1@fp?EaNjuM2DKuG%W z5749Br2aU6K|b=g4(IR39R8_!|B`uQ)bun^C9wR4!8isr$;w$VOtYk+1L9#CiJ#F) z)L}>^6>;X~0q&CO>>ZBo0}|Ex9$p*Hor@Ej9&75b&AGqzpGpM^dx}b~E^pPKau2i5 zr#tT^S+01mMm}z480>-WjU#q`6-gw4BJMWmW?+VXBZ#JPzPW5QQm@RM#+zbQMpr>M zX$huprL(A?yhv8Y81K}pTD|Gxs#z=K(Wfh+?#!I$js5u8+}vykZh~NcoLO?ofpg0! zlV4E9BAY_$pN~e-!VETD&@v%7J~_jdtS}<_U<4aRqEBa&LDpc?V;n72lTM?pIVG+> z*5cxz_iD@3vIL5f9HdHov{o()HQ@6<+c}hfC?LkpBEZ4xzMME^~AdB8?2F=#6ff!F740l&v7FN!n_ zoc1%OfX(q}cg4LDk-1%|iZ^=`x5Vs{oJYhXufP;BgVd*&@a04pSek6OS@*UH`*dAp z7wY#70IO^kSqLhoh9!qIj)8t4W6*`Kxy!j%Bi%(HKRtASZ2%vA0#2fZ=fHe0zDg8^ zucp;9(vmuO;Zq9tlNH)GIiPufZlt?}>i|y|haP!l#dn)rvm8raz5L?wKj9wTG znpl>V@};D!M{P!IE>evm)RAn|n=z-3M9m5J+-gkZHZ{L1Syyw|vHpP%hB!tMT+rv8 zIQ=keS*PTV%R7142=?#WHFnEJsTMGeG*h)nCH)GpaTT@|DGBJ6t>3A)XO)=jKPO<# zhkrgZtDV6oMy?rW$|*NdJYo#5?e|Nj>OAvCXHg~!MC4R;Q!W5xcMwX#+vXhI+{ywS zGP-+ZNr-yZmpm-A`e|Li#ehuWB{{ul8gB&6c98(k59I%mMN9MzK}i2s>Ejv_zVmcMsnobQLkp z)jmsJo2dwCR~lcUZs@-?3D6iNa z2k@iM#mvemMo^D1bu5HYpRfz(3k*pW)~jt8UrU&;(FDI5ZLE7&|ApGRFLZa{yynWx zEOzd$N20h|=+;~w$%yg>je{MZ!E4p4x05dc#<3^#{Fa5G4ZQDWh~%MPeu*hO-6}2*)t-`@rBMoz&gn0^@c)N>z|Ikj8|7Uvdf5@ng296rq2LiM#7KrWq{Jc7;oJ@djxbC1s6^OE>R6cuCItGJ? z6AA=5i=$b;RoVo7+GqbqKzFk>QKMOf?`_`!!S!6;PSCI~IkcQ?YGxRh_v86Q%go2) zG=snIC&_n9G^|`+KOc$@QwNE$b7wxBY*;g=K1oJnw8+ZR)ye`1Sn<@P&HZm0wDJV* z=rozX4l;bJROR*PEfHHSmFVY3M#_fw=4b_={0@MP<5k4RCa-ZShp|CIGvW^9$f|BM#Z`=3&=+=p zp%*DC-rEH3N;$A(Z>k_9rDGGj2&WPH|}=Pe3(g}v3=+`$+A=C5PLB3UEGUMk92-erU%0^)5FkU z^Yx#?Gjyt*$W>Os^Fjk-r-eu`{0ZJbhlsOsR;hD=`<~eP6ScQ)%8fEGvJ15u9+M0c|LM4@D(tTx!T(sRv zWg?;1n7&)-y0oXR+eBs9O;54ZKg=9eJ4gryudL84MAMsKwGo$85q6&cz+vi)9Y zvg#u>v&pQQ1NfOhD#L@}NNZe+l_~BQ+(xC1j-+({Cg3_jrZ(YpI{3=0F1GZsf+3&f z#+sRf=v7DVwTcYw;SiNxi5As}hE-Tpt)-2+lBmcAO)8cP55d0MXS*A3yI5A!Hq&IN zzb+)*y8d8WTE~Vm3(pgOzy%VI_e4lBx&hJEVBu!!P|g}j(^!S=rNaJ>H=Ef;;{iS$$0k-N(`n#J_K40VJP^8*3YR2S`* zED;iCzkrz@mP_(>i6ol5pMh!mnhrxM-NYm0gxPF<%(&Az*pqoRTpgaeC!~-qYKZHJ z2!g(qL_+hom-fp$7r=1#mU~Dz?(UFkV|g;&XovHh~^6 z1eq4BcKE%*aMm-a?zrj+p;2t>oJxxMgsmJ^Cm%SwDO?odL%v6fXU869KBEMoC0&x>qebmE%y+W z51;V2xca9B=wtmln74g7LcEgJe1z7o>kwc1W=K1X7WAcW%73eGwExo&{SSTnXR+pA zRL)j$LV7?Djn8{-8CVk94n|P>RAw}F9uvp$bpNz<>Yw3PgWVJo?zFYH9jzq zU|S+$C6I?B?Jm>V{P67c9aRvK283bnM(uikbL=``ew5E)AfV$SR4b8&4mPDkKT&M3 zok(sTB}>Gz%RzD{hz|7(AFjB$@#3&PZFF5_Ay&V3?c&mT8O;9(vSgWdwcy?@L-|`( z@@P4$nXBmVE&Xy(PFGHEl*K;31`*ilik77?w@N11G7IW!eL@1cz~XpM^02Z?CRv1R z5&x6kevgJ5Bh74Q8p(-u#_-3`246@>kY~V4!XlYgz|zMe18m7Vs`0+D!LQwTPzh?a zp?X169uBrRvG3p%4U@q_(*^M`uaNY!T6uoKk@>x(29EcJW_eY@I|Un z*d;^-XTsE{Vjde=Pp3`In(n!ohHxqB%V`0vSVMsYsbjN6}N6NC+Ea`Hhv~yo@ z|Ab%QndSEzidwOqoXCaF-%oZ?SFWn`*`1pjc1OIk2G8qSJ$QdrMzd~dev;uoh z>SneEICV>k}mz6&xMqp=Bs_0AW81D{_hqJXl6ZWPRNm@cC#+pF&w z{{TT0=$yGcqkPQL>NN%!#+tn}4H>ct#L#Jsg_I35#t}p)nNQh>j6(dfd6ng#+}x3^ zEH`G#vyM=;7q#SBQzTc%%Dz~faHJK+H;4xaAXn)7;)d(n*@Bv5cUDNTnM#byv)DTG zaD+~o&c-Z<$c;HIOc!sERIR>*&bsB8V_ldq?_>fT!y4X-UMddUmfumowO!^#*pW$- z_&)moxY0q!ypaJva)>Bc&tDs?D=Rta*Wc^n@uBO%dd+mnsCi0aBZ3W%?tz844FkZD zzhl+RuCVk=9Q#k;8EpXtSmR;sZUa5(o>dt+PBe96@6G}h`2)tAx(WKR4TqXy(YHIT z@feU+no42!!>y5*3Iv$!rn-B_%sKf6f4Y{2UpRgGg*dxU)B@IRQ`b{ncLrg9@Q)n$ zOZ7q3%zL99j1{56$!W(Wu{#m|@(6BBb-*zV23M!PmH7nzOD@~);0aK^iixd%>#BwR zyIlVF*t4-Ww*IPTGko3RuyJ*^bo-h}wJ{YkHa2y3mIK%U%>PFunkx0#EeIm{u93PX z4L24jUh+37=~WR47l=ug2cn_}7CLR(kWaIpH8ojFsD}GN3G}v6fI-IMK2sXnpgS5O zHt<|^d9q}_znrbP0~zxoJ-hh6o81y+N;i@6M8%S@#UT)#aKPYdm-xlbL@v*`|^%VS(M$ zMQqxcVVEKe5s~61T77N=9x7ndQ=dzWp^+#cX}v`1bbnH@&{k?%I%zUPTDB(DCWY6( zR`%eblFFkL&C{Q}T6PTF0@lW0JViFzz4s5Qt?P?wep8G8+z3QFAJ{Q8 z9J41|iAs{Um!2i{R7&sV=ESh*k(9`2MM2U#EXF4!WGl(6lI!mg_V%pRenG>dEhJug z^oLZ?bErlIPc@Jo&#@jy@~D<3Xo%x$)(5Si@~}ORyawQ{z^mzNSa$nwLYTh6E%!w_ zUe?c`JJ&RqFh1h18}LE47$L1AwR#xAny*v9NWjK$&6(=e0)H_v^+ZIJ{iVg^e_K-I z|L;t=x>(vU{1+G+P5=i7QzubN=dWIe(bqeBJ2fX85qrBYh5pj*f05=8WxcP7do(_h zkfEQ1Fhf^}%V~vr>ed9*Z2aL&OaYSRhJQFWHtirwJFFkfJdT$gZo;aq70{}E#rx((U`7NMIb~uf>{Y@Fy@-kmo{)ei*VjvpSH7AU zQG&3Eol$C{Upe`034cH43cD*~Fgt?^0R|)r(uoq3ZjaJqfj@tiI~`dQnxfcQIY8o| zx?Ye>NWZK8L1(kkb1S9^8Z8O_(anGZY+b+@QY;|DoLc>{O|aq(@x2=s^G<9MAhc~H z+C1ib(J*&#`+Lg;GpaQ^sWw~f&#%lNQ~GO}O<5{cJ@iXSW4#};tQz2#pIfu71!rQ( z4kCuX$!&s;)cMU9hv?R)rQE?_vV6Kg?&KyIEObikO?6Nay}u#c#`ywL(|Y-0_4B_| zZFZ?lHfgURDmYjMmoR8@i&Z@2Gxs;4uH)`pIv#lZ&^!198Fa^Jm;?}TWtz8sulPrL zKbu$b{{4m1$lv0`@ZWKA|0h5U!uIwqUkm{p7gFZ|dl@!5af*zlF% zpT-i|4JMt%M|0c1qZ$s8LIRgm6_V5}6l6_$cFS# z83cqh6K^W(X|r?V{bTQp14v|DQg;&;fZMu?5QbEN|DizzdZSB~$ZB%UAww;P??AT_-JFKAde%=4c z*WK^Iy5_Y`*IZ+cF`jvkCv~Urz3`nP{hF!UT7Z&e;MlB~LBDvL^hy{%; z7t5+&Ik;KwQ5H^i!;(ly8mfp@O>kH67-aW0cAAT~U)M1u`B>fG=Q2uC8k}6}DEV=% z<0n@WaN%dDBTe*&LIe^r-!r&t`a?#mEwYQuwZ69QU3&}7##(|SIP*4@y+}%v^Gb3# zrJ~68hi~77ya4=W-%{<(XErMm>&kvG`{7*$QxRf(jrz|KGXJN3Hs*8BfBx&9|5sZ1 zpFJ1(B%-bD42(%cOiT@2teyYoUBS`L%<(g;$b6nECbs|ADH5$LYxj?i3+2^#L@d{%E(US^chG<>aL7o>Fg~ zW@9wW@Mb&X;BoMz+kUPUcrDQOImm;-%|nxkXJ8xRz|MlPz5zcJHP<+yvqjB4hJAPE zRv>l{lLznW~SOGRU~u77UcOZyR#kuJrIH_){hzx!6NMX z>(OKAFh@s2V;jk|$k5-Q_ufVe;(KCrD}*^oBx{IZq^AB|7z*bH+g_-tkT~8S$bzdU zhbMY*g?Qb;-m|0`&Jm}A8SEI0twaTfXhIc=no}$>)n5^cc)v!C^YmpxLt=|kf%!%f zp5L$?mnzMt!o(fg7V`O^BLyjG=rNa}=$hiZzYo~0IVX$bp^H-hQn!;9JiFAF<3~nt zVhpABVoLWDQ}2vEEF3-?zzUA(yoYw&$YeHB#WGCXkK+YrG=+t0N~!OmTN;fK*k>^! zJW_v+4Q4n2GP7vgBmK;xHg^7zFqyTTfq|0+1^H2lXhn6PpG#TB*``?1STTC#wcaj3 zG~Q9!XHZ#1oPZo zB6h(BVIW5K+S@JG_HctDLHWb;wobZ0h(3xr6(uUspOSK0WoSHeF$ZLw@)cpoIP|kL zu`GnW>gD$rMt}J0qa9kJzn0s`@JNy1Crkb&;ve|()+_%!x%us>1_Xz|BS>9oQeD3O zy#CHX#(q^~`=@_p$XV6N&RG*~oEH$z96b8S16(6wqH)$vPs=ia!(xPVX5o&5OIYQ%E(-QAR1}CnLTIy zgu1MCqL{_wE)gkj0BAezF|AzPJs=8}H2bHAT-Q@Vuff?0GL=)t3hn{$Le?|+{-2N~`HWe24?!1a^UpC~3nK$(yZ_Gp(EzP~a{qe>xK@fN zEETlwEV_%9d1aWU0&?U>p3%4%>t5Pa@kMrL4&S@ zmSn!Dllj>DIO{6w+0^gt{RO_4fDC)f+Iq4?_cU@t8(B^je`$)eOOJh1Xs)5%u3hf; zjw$47aUJ9%1n1pGWTuBfjeBumDI)#nkldRmBPRW|;l|oDBL@cq1A~Zq`dXwO)hZkI zZ=P7a{Azp06yl(!tREU`!JsmXRps!?Z~zar>ix0-1C+}&t)%ist94(Ty$M}ZKn1sDaiZpcoW{q&ns8aWPf$bRkbMdSgG+=2BSRQ6GG_f%Lu#_F z&DxHu+nKZ!GuDhb>_o^vZn&^Sl8KWHRDV;z#6r*1Vp@QUndqwscd3kK;>7H!_nvYH zUl|agIWw_LPRj95F=+Ex$J05p??T9_#uqc|q>SXS&=+;eTYdcOOCJDhz7peuvzKoZhTAj&^RulU`#c?SktERgU|C$~O)>Q^$T8ippom{6Ze0_44rQB@UpR~wB? zPsL@8C)uCKxH7xrDor zeNvVfLLATsB!DD{STl{Fn3}6{tRWwG8*@a2OTysNQz2!b6Q2)r*|tZwIovIK9Ik#- z0k=RUmu97T$+6Lz%WQYdmL*MNII&MI^0WWWGKTTi&~H&*Ay7&^6Bpm!0yoVNlSvkB z;!l3U21sJyqc`dt)82)oXA5p>P_irU*EyG72iH%fEpUkm1K$?1^#-^$$Sb=c8_? zOWxxguW7$&-qzSI=Z{}sRGAqzy3J-%QYz2Cffj6SOU|{CshhHx z6?5L$V_QIUbI)HZ9pwP9S15 zXc%$`dxETq+S3_jrfmi$k=)YO5iUeuQ&uX}rCFvz&ubO?u)tv|^-G_`h$pb+8vn@f z7@eQe#Kx|8^37a4d0GulYIUAW|@I5|NIh%=OqHU{(>(UhKvJ}i_X*>!Geb+Rs0MWf66Lf z-cQ(4QOENSbTX$6w_9w4{5eR?14#?)Jqf2UCk5US4bnz8!e>vFduH6(cZZ=5*_!M# zUTZ_b<4v@}dSQOcH@wt-s;3JhkVDct$6k9!ETdi-tplkaxl^qF=p}Q8KMVm+ zeIa2q?RYr}nM0d_W2YWv%JKyCrGSePj8GrRN)<$Nsq8l$X=>`W;?>0eME3|8t&d$~ zH`XG45lBh>-te_f0Mh0??)=Ee0~zESx=sZPv<#!sAVv$0qTn@CmCUNJU<#=`GC)&P z9zuV~9*3_n2*ZQBUh)2xIi;0yo)9XXJxM-VB*6xpyz{Rx2ZCvFnF$2aPcYFG( zyXkO(B30?mt;5GW&{m^w3?!P`#_o;Y%P2z^A`|4%Bt2@3G?C2dcSPNy1#HMXZ>{+L z3BE#xvqR@Ub}uKfzGC=RO|W%dJpUK#m8p&Dk|6Ub8S+dN3qxf9dJ_|WFdM9CSNQv~ zjaFxIX`xx-($#Fq+EI76uB@kK=B4FS0k=9(c8UQnr(nLQxa2qWbuJyD7%`zuqH|eF zNrpM@SIBy@lKb%*$uLeRJQ->ko3yaG~8&}9|f z*KE`oMHQ(HdHlb&)jIzj5~&z8r}w?IM1KSdR=|GFYzDwbn8-uUfu+^h?80e*-9h%Nr;@)Q-TI#dN1V zQPT2;!Wk)DP`kiY<{o7*{on%It(j0&qSv=fNfg3qeNjT@CW{WT<)1Eig!g9lAGx6& zk9_Zrp2I+w_f!LRFsgxKA}gO=xSPSY``kn=c~orU4+0|^K762LWuk_~oK{!-4N8p8 zUDVu0ZhvoD0fN8!3RD~9Bz5GNEn%0~#+E-Js}NTBX;JXE@29MdGln$Aoa3Nzd@%Z= z^zuGY4xk?r(ax7i4RfxA?IPe27s87(e-2Z_KJ(~YI!7bhMQvfN4QX{!68nj@lz^-& z1Zwf=V5ir;j*30AT$nKSfB;K9(inDFwbI^%ohwEDOglz}2l}0!#LsdS3IW43= zBR#E@135bu#VExrtj?)RH^PM(K4B`d=Z6^kix`8$C1&q)w1<&?bAS?70}9fZwZU7R z5RYFo?2Q>e3RW2dl&3E^!&twE<~Lk+apY?#4PM5GWJb2xuWyZs6aAH-9gqg${<1?M zoK&n+$ZyGIi=hakHqRu{^8T4h@$xl?9OM46t;~1_mPs9}jV58E-sp!_CPH4<^A|Q5 zedUHmiyxTc2zgdxU?4PyQ{ON@r+Ucn1kjWSOsh6WzLV~Bv&vWLaj#Xz4VSDs*F#@M>#e^ixNCQ-J|iC=LcB*M4WUb>?v6C z14^8h9Ktd1>XhO$kb-rRL}SFTH)kSu+Dwds$oed7qL)Jbd zhQys4$Uw~yj03)6Kq+K-BsEDftLgjDZk@qLjAyrb5UMeuO^>D43g%0GoKJ~TO0o!D z9E$WfxEDFTT?~sT?|!7aYY*mpt`}i;WTgY|Cb4{Cscrmzb(?UE+nz1wC3#QSjbg>N zleu?7MGaQ&FtejK#?07Uq$vIZX5FqR*a=(zUm`Fq$VUl){GQ{2MA)_j4H$U8FZ`=A z&GU_an)?g%ULunbBq4EUT7uT=vI6~uapKC|H6uz1#Rqt$G(!hE7|c8_#JH%wp9+F? zX`ZigNe9GzC(|Nr8GlmwPre3*Nfu+ zF=SHtv_g@vvoVpev$Jxs|F7CH`X5#HAI=ke(>G6DQQ=h^U8>*J=t5Z3Fi>eH9}1|6 znwv3k>D=kufcp= zAyK#v05qERJxS_ts79QVns}M?sIf(hCO0Q9hKe49a@PzvqzZXTAde6a)iZLw|8V-) ziK`-s)d(oQSejO?eJki$UtP0ped)5T1b)uVFQJq*`7w8liL4TX*#K`hdS!pY9aLD+ zLt=c$c_wt^$Wp~N^!_nT(HiDVibxyq2oM^dw-jC~+3m-#=n!`h^8JYkDTP2fqcVC& zA`VWy*eJC$Eo7qIe@KK;HyTYo0c{Po-_yp=>J(1h#)aH5nV8WGT(oSP)LPgusH%N$?o%U%2I@Ftso10xd z)Tx(jT_vrmTQJDx0QI%9BRI1i!wMNy(LzFXM_wucgJGRBUefc413a9+)}~*UzvNI{KL# z_t4U&srNV|0+ZqwL(<}<%8QtjUD8kSB&p$v^y}vuEC2wyW{aXp2{LTi$EBEHjVnS# z+4=G$GUllsjw&hTbh6z%D2j=cG>gkNVlh|24QUfD*-x9OMzTO93n*pE(U7Vz7BaL% z@(c!GbEjK~fH}sqbB1JNI!~b+AYb5le<-qxDA9&r2o)|epl9@5Ya7}yVkcM)yW6KY7QOX_0-N=)+M!A$NpG? z6BvZ8Tb}Pw(i9f7S00=KbWmNvJGL(-MsAz3@aR~PM$Z>t)%AiCZu?A|?P*~UdhhFT`;Nb)MxIg*0QlkYVX+46( zSd%WoWR@kYToK7)(J=#qUD-ss;4M&27w#03y6$gk6X<-VL8AJM@NFTx#Z!n)F5T357%njjKyjro(yW8ceP{!%;*Y>DN`&_18p(z2Hg$%K zohbgJcp%+ux%q6F?(sc_mYJ<$;DxgkTEi?yjT6Du@+n(KsKtFHcO%7O z=AsfLSTdE2>7a@0^`;)?Fg|s2XOPV&fo<%Q)Izaw4s&RvrX0^+aPNq|yE?oSa7 zsnNs!+vGcTM4yM|$9so*2Nv;ngDD}b0MjH6i4e|l^O`lzCRj)-qa6f%|afJpmf(S1J2k7Nt^!;Q}0 z4ejPF?^M~Sv+@LYn&IFUk2;1h?kb8lfrT`oMm=JBm{fo5N|HY~yQQ`T*e2?!tF%*t zf+ncx15$NdF82GXrpP5rJ7!PVE3>u`ME$9Hw5RlP zUh+s#pg{9kEOsAhvu2pry#@dvbB3Lti+9VkLxPZSl;fNr9}wv1cTahUw_Py7%Xp;C zaz__|kz*ydKiYbsqK{?cXhqR(!1KMoV-+!mz>3S8S`Va4kD#(aKyqecGXB^nF*>mS z1gG>fKZc?R~Tye>%x+43D8=e zf0eKr-)>VEu7^I{%T}BT-WaGXO3+x<2w2jwnXePdc2#BdofU6wbE)ZWHsyj=_NT3o z)kySji#CTEnx8*-n=88Ld+TuNy;x$+vDpZ)=XwCr_Gx-+N=;=LCE7CqKX9 zQ-0{jIr zktqqWCgBa3PYK*qQqd=BO70DfM#|JvuW*0%zmTE{mBI$55J=Y2b2UoZ)Yk z3M%rrX7!nwk#@CXTr5=J__(3cI-8~*MC+>R);Z)0Zkj2kpsifdJeH)2uhA|9^B;S$ z4lT3;_fF@g%#qFotZ#|r-IB*zSo;fokxbsmMrfNfJEU&&TF%|!+YuN=#8jFS4^f*m zazCA-2krJ-;Tkufh!-urx#z*imYo|n6+NDGT#*EH355(vRfrGnr*x z5PWMD7>3IwEh=lO^V>O>iLP~S!GjrvI5lx<7oOg(d;6uEFqo5>IwptBQz;`>zx`n$ zjZQ#Hb)qJdQy#ML&qcfmb$KT+f_1#uYNo7HHDY}7xAw8qbl;9LWO-cndfI=5$%jBw zb}K3U%88Fg^|&0Vc~99bKl|$3JzdawRZ|`7%1S<8B7>9*rWAT0U<@mHDfnL1`~1U| zDw7m@<@}C|zqeHM(OK@di6~sKHiJvk^I0^S<LBe^_xZsUOzVkYSE)Bxn*NekQYbyTn5SRt!n{EseOo-$u)vjM(PV%6cIG3Kv$>dd}HUyXi;_Lv>}OyUj38dPe8+1Pr?{LXnIBCoTnocD60@vhsz+GG5lJB9ncgP8T6@LwuzZ)J zKETBS~AvzGE!{u^+Rd-|Gn!rc@UUnioP0{@_j_>tg8YI#?y zL-H$=&xXkCJ2Qe7&exbI!z`OyPxBp|4_ zZrrc;OAb%T4Ze%7E}FBB`8t$QN0sA3vpwU>?7QAmE%-ethXdCtby$Qm3v$lNxB2a7 ze6F5eEWV`={#W(G)Va}7?$D65WF|f0nmfZT;?=LE6Yz{{W3CV2h^Ma+LXdZ(HMVKZ z!YXJ*34lo!FA>)jSo@*!Hs_)IwmTo6pBr3c^j2u_amZ~g;&Z2jZIw!}v@w8DtZz7|A%rFksD4^HYB!xFAqX;u0HxPeG!3Z(z z4}+^N5-nckKf2YSR5R_}PD+2?Wq#BOiON74#{`u=4f59WKdy_77EYq~_|X6cNtno{ zZ?WLwbV57Z6uI|uY_;vzv~~`eiiOl($Au7C*X<&MY5v0b`KEu-GW}{2UNfmmrP!^Y zAOczy!}TIJsom=}kxH)9W`&Rp&rR6T7y&~5nXbut;wcs@M?aa^9j{ZDtx=1?P8TV{ zee2kKf%CE$mogyKKT=xQQ#)OCl9bjc)}{p2X$}aG`^B0w0yi-rI!d4e-u9uR$kJK3 zhqBG9Wx<-3DFw5olJ6neF@hB;8o(r(GB_;p1i>}cjN`JNEZg-dlxtLL=8~gfLrBy_ z1~bGh{I>_xqh(}?%bCf1U6~K@+N*i}bTi+pUAW)oM0`D*PeJq=S(-|Plxe9OqxBRg zM((r)xkSH@j!8@+=cA4US0fDL&O?W~x=Mlu>7zvHO2sy7D5_7ulP+YMecP~}F0b*K z3oO2j{o&WHd<&UWcyA(&6hvBJv}qUZ!@R<(mwKB^;y3zeE1>LzbDWSkRD1|5MZPx( zxd=&MsQi1eE@@6W+4N`cF?yh!3R5JlAV--&RONWQ#?SbrQ95<@ag>C{jQmGXpQX{) z1dbFg1_`qLxuDZnX#PKfCW*Jl3F&^7@gO&{>Nb8um$VBcF1!AL=N6`A%BFj=`QaPI z+m^`n+{o)KLif;Gt|7aQ(XXRP@x)jJt}s{&S`I3}jPTY>$@W0BD3Oif^ehs~!H7T1FUSWxLS&W;0q6+azjbWn?3!q$ z9qbmdr4H4Y)p^NOACJ^L>u}NS8T0_5hW)G z%Hv}dAqM}d@t;|hf8>+NHHPi*xePsRlqr46njzhiXXZti7i5+GTKcrlxA->OJ9*Pna`02EIA5~(SMV`T@H6F2VtwwP1$tYujbC1^VE$Yd&I`WSwB^1( zT7NP3|85z#R%&wktjwY_i*n_$RRZPM^ota{LPV%*>=>sAv%fn*cnkCIX{^SJRmwZv z!?f@T&D%Lz@*!mNYTGp{J|7)~PR*ib`;l^E)rQw@)Qn0ECnB8W1S_SbLZWdqcmo?V zX5g0_3qhn4TrN27^x#Qdq*4*G1L|)I^b8GuP_8O{p|M`uvZO6McXa>OSQRW|kQTNPZ#Zyj~SZ<`6B)Y+}jxpn+YT>MhZ!Rxyd@rU>N zP>MkDBLX|<)SJaO?Ge=!D>i+Wq&PgneO?ZXUq4IQuTq z+V{ZGkuw77o~o$!b>4ov`6CKJ)$cf=S6%1ZQyYU!kz_qiuNxY2*Bh;K9J6o_YV6xQ znW|>x+#Mymu&wF9P|3wP*(ZjwE+ou|{eFqMv}d_iEyH zQ?NSf3VX+EpbrIKmp|oD-t_rh(D#e)fp)dYbG{=yPj-3-#l+iu7r+~#w|(#wv@G0` z38`Yhf5CznhyDEhD;jzaz7fc8L?(n-m zR#|5hqq#yRoeTm+h^9J42mnB>BY>HSu&&O-Hxo6j!dqck)dGS&odS@Hsk2-*Z~x z0!%{@gT645S5DeF@JZeE$DFl*nJB8Z|JKvs%7d`KjbJ*AsA_=fEZ&V9=*+K{(TF^( ztjjYr(7@fV^tDs9c*#=8)ZRKO17A5Z`8v*)U+?hS>3sEfgh3`#vFO^7n}&&adV?}n zdy&BY1h|I@eBm=l*kqiJn>vNkOH4l$Op5Hw3K_w8lF!6T@-H)S2W|Km#6!-X#NqLJ zsiVDrc%*@I3^Gen$)6O0C_qw;8{aucF;}U^1%YE`?AYTtb`Z$B$vfhcHQF`VCB(Pf z_G#fV*Colv-k!O+=^nDNe(03?m+RTu&28d%>JrrwFNb{ND&?Ad(=DP@voz$usk1|w z&#gTB7F)#*LtY6@pIb(g72*LcnXRlTPQAD?)ZFnB*EsZqxM&Uk_KGXnR{4}K`I6i- zU9}R>tiO0De1Hx=kAy>7O+nKO@kGQEYOai&S9&WTY+flvR?uhI695W-xZnq4aRMh8 zwfp)+KYWVB#r=5AwwlSdM4@x7-R_{2;1iqz2lXL$7iu1>5W*+I)jlkMs>60=LN)Y= zbPw;;%U+%p_&{2Obemh$BLmbpDd31YxJ8#TpH3~3B8QLUMvx1X5Vl48hWSNN*UTlO zQgQyZbmyjGC-s$3tnB z0mfKUu2+_c`ZVvDVwUy#j3W*l^BSXXQ%=r6Z}C73jx8DAk!t7k{dK^udpHIcUejp# zyx}og$Hr+f>9kaZvno*Om`d|VTUce9tHM=R8thoG!a=NT$s;g@n_rAN%cp7nnLuav z6}j56TSSfPL$p#y#!5TVyqa3zTzi7@#IoeR=E6CdS`JrR+@i2DwZ?T*bh+(k5!a)0 zgRdF93z8XJ|5?>hDN!YAW5cK=+BwDLNT_+otd zqC@*{S0hCKZ+TnN*2&qx+WP;ZjHA`yytPcwKl~)uy)sQ}Q*0-&3X|YFYAjmolaciq zxS$r5^fxICetD*Dw78M9leVvhAOZ$=;SP7L!Vs?+0f1h*YCuTXIt03iAf)0=0KEvZ zB69o-zg`0C#hQ>`4`}1g=a~EID(j9HbjJG^tV-zumR-+fahTPveA{%0u2uQwMZ%}5 zwY!|}i0oTd&>^QSRhIKU+cMC#|C3f>|647?v1B(wH)EWb{vuJEJh~!#|J7%=h!x3| zCH6m}wg;>Q&?@5Ct1%n`lj%*>9a52d@wmvE`=aQjtz$sWj3V;fDns5<7d2*``)u1( zh!Ub>!#N0m=Vz1n1=El zwb2IVRw$6NIFRpGyUoM0iqc$IPehcmm7<0s7F*Yv+zq?_%pf*SS~~}s0M`m(rMbx% zi?|Wjr6fJN`_J8&B2$4+V+iO~m>s~Zr2T3Y3HGREFQ%%pEoU0N));AeSVM#gYQ>l} z0`RhgS`R^pJH31YQ~eTeJiI}g$&^|nv{!h?8mJK{{XDt+sG8D`7)$jvM#hjPI(5sS zfFW4s7wao%Lo| z#pJRC?iZOai;57ANs|vm6%}rPlGo}}Aso1t#xJn}%VW@~1WSjh(@JTgM$0x6ZQ)gB zdiox3f>kqGZY}+R<;wlNoWJ8#X-v)1;wRD*ec*wnvsN06Q@cZuD`deT-Bu&G;2fBC z0FE1%pG@{Yo2O87&dE;w???%`9s1gs=3GpM8xx_}=AB$K9y=cD);^iE*p4;T1RU%B zBPr)yqOBX<2}xt%g9qr>;z&|?4vhhw7@$a}Uy2b%_^VdB^VfzrebKUPnq;hliCNU% zVt3R5EHkhN^Pv`REF+npA@#HdCQN9IbQbqSDs^+zt(A6;rLwN+@Em}WrV5vPEo!w^ zSCd3RZ8{7a@d9@|IF&&G%irS7FHle?@49LctrtTt=rP$W)se*#RkFmyf)D1^U6EYI zfh+N?uH?-))O$9zM19VsuGn8?o~5`scXU?!P@_cWP&1U4PQqGus=sQzrX+YvKG%XBL3nt6!&M<#}wqA;Mo(}qrq<1lNkpQD-T#-y>grt|E+JNU) z2j+g+QPcA9VEFc0k;H(hSNOpp$I+!$ z&d&W6kBM9+c{X%vr_X0}tdB5dvEDyk5H2*T(QW8Yz-#tjvF?up=^Kfym``^!&O-X! z@HdfpHn;}_)y$Xjb-5cR$Q#-XdhKpmJG5pl>h*Q2(u*gt_4(>6?kG)%T3*&TT0qI( zL!aR~4HiJiaHlgdNcOQP6xx1f3AWx&8}(NEps|G!cO>J^rE2@&-t#_Jb7GYgnLnML~1ze1D$?~BwbgA^=pr55tC|d7w42vN11_8bS75u z_MRKqE7Xik8fk>6(VE5{qT}6rSzd|o}Zb>*aI*Bwg%ccE$_ytH;g2H z^i3qY!+aE*&s^BMH9TI6GLm&9c`D6)3{-+?2Pon+040Yuv$2(LqV*krKhTg5CHOj* zquacxc1&~=S(O@gR8aI#?R%)meONmw1rub9E2QzeM$pBBm2wbPNR3tab{op53<oFwaUbARdD5jSA_6zmKX7!VicEP1m)rYnk{P- zruRj;4c8S29Rd#Baf|fq_pA^r3K#qRHS;($XNoLI*`puZjM?bA0tH>FDiVc9qR*|3 zGn#nhqxkvqFwRfCB~2yA0pxWapfjCdAem$utuon-`*6}mUP?l%$CE(FjAwL%Oe7GQbu7*+&q>*(cAofJr^gg>xw>hx-SO7Lx2)I} zJ)tV1XKbkE4sS&La#-smSq>S9gBzGLH%v?KVezdGv%Xs}kDJZJi{lDl(FpLZupBta z3iDlkd6LlkRro}+El?GIObw06D%NTXpL{W}Ve*%u#{wTC=+VHS%o`sAez&cYz|Tn` zcK_~pvN%cd^8FlFypCjTjw9@ulLoJ^!QAK*++^wC2~}CFeoY;q6y~r&f^+0>LR6)n z$hSev@GzzGgDc>)#u5_;{T9^5y5I?m=z7=J!eVId8p6R5>NV8)h|bA}#3KUufq4CPGiWYvGj%0=H@Q66);F)#cDMND4 zX|?rg>Bb28q*a!_sgVF(A=OeC&je$C4>$0%yy;Fla-hl(|9Ww4!@Q#E2hpJMMxpQ2L+R;+ZMpS+|j*F`Fh}p)`a_*<`AaeFzNEq^- zlF$7BFKD%p@K+3$Vx%N{QOayKKWU#JOAwXiLO62cA6=|DiDG_Z=ef;f&gQ5-?+Pb+ z)4NsyEZXCdjq5tgDN39V9!6#w25+R1;PD7ss;hFvQn}Hnl3^3h<`ylzJdVEL>|Jj0 zg>=Pscwx&;pWEzMn`ld**$1F-nhqlMuX;G{lWrT<<4$7MZ^*4a2hAMf)3eYiT$lRz&9({j<=%DWIRpgu zoOns@gF}AQ_6Y5RhySg7yMtJcYQap6^hgy{`zX1Zv26q4<)g@t%aIi|-lmcySuRN8*5f*$aEFi8o#kMKRCMnrAY~l`= zez#50^@Qo+6r508>iKfAbbc3JwCnjnmw;~=mlMG`(H8EJz7W6mh@mdinO&)#zHX=| z&|fo@s`;njVkkCMczSnp+TnW8YPU4w2&QmzEh1}orF~KlT=V+`!!rH|PtULCcL!P*m0EaN0Ad2qBw%Gs40jfu=%`N*k@z2-p?&B?Yum-p+h?7(!D^ z&f2Bn_#t!4HM2y^*1GN;U+_x8T$Z2>U9Yx;p_9Qf=ww z2hxO^*{%p9-CwMKz}C4mTi8xvqhivltE|}Kgq5MK@f6tBT&`@RYzsFFi>*eMZ0Z6Y zKBl`GOh!U%C+PXJ|7PF)V*~#8eS80D@v-NL2U&;i62W}k+vJAC+7xF`eq%c0b?{PVTcqiDr%6jLBdkVcTwLJSd313SP)1r=;2`cORbMzrhqZxMWcTWru5-l_H8;f|?{^M%%7>sU zGx2{fX*t;7SewS|NvPR-6F5p(ji7d}CK#%7y}jsPkgj%F5cUbQ?b7uWpYks^|DL*n zau%X$^(%wXMS3c;C4=p*#q>ahmLH5woLsn-YcZP~mH-rGnRyl#KU4MsLu+G3z90+q zM$HCWgZYR`8_I%8)SYuBltP$sN`-6hcjnzhDsVl+Y}yqMN*4MWsJX_6R>Cyw8cHGQ z1>r%vkDxxc#ACA4+-ZO|QBMUz`YHrS{l-*$> zi(n_;4{Gn+d2gn)TA<9) zibWdKJv#s_f5K}vM=d0NaYrd;5A+Fy^=+WgKC`@bS>!P5@K4fzE#VYfMcNdbbvLPY zeR~!f3xU>|pfq-LOsoF=t94x%K!8>#8tR4KQ2G3Yr?Cb98^KL*+G8``rHMpNUN}-T z5HGAkiLh{WR;N$Nk3X_2^3pW=vOFTOb(LS0Wu)0)I{8sZj>}5ZGtD=va-72l&5`L= zhyzBWie2UrC|?(sTcuk$OwvV4oVlxc3ncXPj|cD%%*6(hoKMd5wzPQs^6g)B0xK#d zemOodB7D(!@v!|eYqMfx@M#b+D)PwAuvimOW#13i-xAR5)Ai; zXNX(A@M*y&+TVZI zGHo$F*Ipg~Rnp`KlMNAl2o86}r%Yv9#!O-oo`pe`880;-Y28tR)b4H%nqXXHxN9m0 zI&#!(XhT=T3$WS$)K4#Y=ceN`MsP0v1X{nIoQ14S2^--MnUp21=V3&Uv8|y}^}7Vl zI5tRbOp#?@ay6uncZFE0hg}kt(k%piw^M8;0yynsK_!l~uP??IqzmKJMUqAW^GG{~ z7Fg)Q&zBlp z%Tj8jOUpuR>YHP6zYsX?)aJ`)_pRwu+Tn8I;brOW_`v$u$`$9T)cO*O$j=?mg>dW$ zw=&3=v||fqCr`-$okN*$S9(Nyrs}+Lu#IwDg2xSBz_VfU*?A&26vwv>&>*U_TT7-7 zS~X}fT%9+q(Xvc0qzOG^8gmMcZE9izi5feqvY(aY=%reP+wVZ&cRd`^y6}-gJ&_6n zR%Wdl3vQ4DOt!X9ry7j%=+7pLPdus*@7dZMBo0_WKZPD1(o{=;D> zyc9_WFI3{URv=d6EXcnOG0$(J(R#8Oz$kmuSFQ{-Y20}1027!FkodTU!fouSybwqn zRO-$2BH(w4)$wiPo<1w-4*p=Q0@YKRm^cgiA>~ho)U8^e>SBk*!@xvr0CdvnLHS#CACVuQfgzF>8qV znqf{oO1}RWhiZ3g!Tx9sk!JfLqcP`>Ksx#vZuLg-DC6h4mT!vlU zqw0`0CzZgY!EN0*{sQnDNFn;T<+e_x$zY|n;p0@d^hK*n!S!=#^;P{*D^6~h!T7r6 zoiMxtovMo-dj*{qZPy*c3gaMBEDQDkINU%d8HeBZVlRuzkCId9rx{?L= z-dLlk$w&JX5wn+8`mtqCpKnx+w+$@6DEUI}8P%xN$MEsw%S1-$9PM6r^jP-@?cS<# zhg$wl0X=s3{8EZ2U9(};p{X_b1@jJuGgx`gDK{6MpF|XON_=Rv%-<Ee1cuuy?nl9xVDa~x=+8ppnOQ9 zN$53qi4QQ!co(;f!#YJ8(=Z>_9UF#(QOVjS7T!g2)*Oecrf-R^)tFugBkQsMVNua# zS;1V^#fJS{h+!O+FgS%0=Pd9;lMa0QHn?-n(<0b2$<|@r>fjiyw6u*UoGmU$ayJM@ zfp;c4@{$b*Z_v9?8ZEp{m6Q(mDHW<``n?jg-ZN)Hhvxn*l=O1f*K%{5s77WCt!ugS?*2oG5-Q)JEJd0+W5=doeD$Wh?U$ZRg)K$v8cmQ{hba9jw_mF&X zi-dV?WITgIz!!0uB~jE?(t`&qo{WGyUspX| zc6+F2K4l5$LqxERF#`I&k^^opVIMZjGhsJ^vI0c%kV+|&_k>~}ueTtj;^Dfb@xHs` z)-39elzVA~D~n_aoyBQ1>Qd2!;E!G*pZM&RX`r*y)b`yxvP2;#vM*;CQGPg|gni)} z47`Log3PUyVfdmJ2zvHBhg7T#D-H=myzkeUa$@);WC(yB4k^*$wda3=S-UH5Q1Hx6 zPcGxMP&kXBa+4$s#Sw3-V?mlHj^8&bLpIN~GkYj;!;M!$ZxvtQY4j&Ngz_mxuQRqx zYTbN6epx@-!0jRV5yiSIJ<^mCZ<|;&x2~a)t+(eAVB!1XpCZok*Z2C5P7&>z-Oy?t zf@F(_FLsSrfCus61+Vt~svP%(u<4pzT5{w*0XqfPV%~|=%aq^$=*U+_trGQaoUxbt zBV#Yqx+ULku8yPJs4gGcC?+3iRt_6)Oi0DNLxdb(!n!cup_XUZ3eDe(!DChZ!IG&L?_;T-1GB!R;;Sk;l3Y*JQ!I|l20_f}ZyC;4D7R@6F z>%z~wV;Bj1b(*kp26Ed!Y-OKxNbt3%t))xxOrazWsmwvW;uaSaJ0ou+{01vXvU>_V z6Ha@+;giVaiyg`J8ENQf)Pq>!Nf22>XFHnXTNk84&jp-^YwmlUqnOll8)5mzlO$o! z#fSMwH8Pn+Fy7O5M5#ZGr$cKfaGf8g;XN)<*TrQjMk<}_oRf&b6qZoR38Q{Zxo{V; zby+J_hCZT1>`4~jnQxo|ji%BQ0=BLzC6c!1=B(jS5+fcp%q)JI)=c3{D|=k5;0&c2 zrbRE|qxkNqah2nvextOvjYA{T43n1c6eO7B9DH)tLqB46E7;0xKM=%#wx-*-+*OY{ zQ#7gMStz%I&2&rbo>#T20OD_#g`WYbt9+!MC08%zSMhqMoRk)7VOk%~`sD%(U6zzO zdmSC9@x0GCv2_)umYc5@#%efP0_cu+=f^}k$H9$N_>piA_(5UM_o{++8+Yf8SJ)?C zDd3l=GGm3EEy;&Z6N=+XP@IM0L=uW^ooyYQYyx1vwFR?@U~BAtAqTu%Mi2 zTCQh$K=UZA{P`Cw0I$xAh_f?fq-Goe`7I38{3L8?K3`lRhSAyB)tHT@4c!Y;bJAAS z3u>Q7qx>9SJs4$EB=hxh)u`W5jp?>^g1s_MV7<1zN zXt{FSt?Mt&8aCy67<)b@eg@h0iCW@%+pF-V>p${fyEk6_Gvp|ms{Whi-9eNId?xzZ zm|MI>F;JSuaUnQp#|}k3o&ddCZEeTI608txuU4~7K(wg9 zg%+}(7h2@(%>LI1F*puF(h$ZD`Q+ar!VoVajPY0-XS$>6F_F?sc6Mr7>SL-&{pC;2 zKx@2{@ULz7RCpaKg$iu2rcY+y*~qaPo0}^7T1K$_(NPS<1;V zTj8-xC%WvgDI_YYEG{bySvyO3M>XKY)oXgGG*eB{yDgNQ3s3)A~@n>!O#lNh0! z(-dqW#_z&mMfq#2+u61N`L^({4UoU8wE5`4c}{SGFzKb(BK8hM%cf_zj_HmC48)M& z398ICVJTGzBaz7K{L+Ew=;z^0xA``wbtPs`r+Wrb^_vzzhukq{;A`t&-ktzb zbqy`Z0#D6fdVAiodjF3J+qI*vu#=OCjiL4bIIXEf4?zmN7(H|+<+WfR7@7jrMx7FY z5*0X1enhay-q^M?j}3Pd^|U9(C3#CQU3=hlc~@y9@NQD{UZNfC^5?Cuuuu{ebn_<7 zEzudv*b@QP%)N^5jP;86nQGb<*SOytCM5wmf-=rH#K{Wd$2(X#S$jF}XIxZC1)zir zU2Wq>hIB44nCTqx2x<{_wiVzLSJR}L%P!Y|lFHtA_=bDj=OqvmmSZ}ffuqPge#V-f zZDk|XX0RK}=73LxL`H%OXxK*^I2!fp&kxatErK~&tM3@j1a(Yrq$z)R()i?}p|0^Y zhW&8!IpRA1jJ3e!p66ZY=eBmEA+$A`!%s+{Cz!s$IA`{_Dh0^jt!vn;+Nw}hx019Q z_Wg=#-G-~&@>l=&H~48$L8`LX)!Bcq%(DFa2Loc91u@WcwlHzJwo{cdur>bQ;{fr_ z`rC5QRQ_)`8EadJzz-{K&sUI~>NX>P|c4l)fKS0gkuGe_P ziaQy!%CK(CtAwj-J8&#kyU=G(k%3y`!gS9dU&1xIrGRL|!&aVMEaezUIpopoET~xE zp`%~`LZfn!Lu^+00?>v4UOfM!HeeQoLZP<#o`^9oi69|$0BM?n17R~tGpY)eJiv@$ zTV-~ZZ*}C1J{a}p`>l$Bx8qRBq91;dLdmp84auzmcd|XzJG%I|r z^E-8Tm~jRn_>as(R=@~z3I2E3<=#hXn>A=0`wfOGIxiP)N2%!cG?&^w=E#TR z`lSY@Mm36zu4p3}+S#67MpL$d{gf@dnP%*ZMW=gCXK-%0E(xAC!^+b7hCSMF$m;Rn zCTErbBK#;a)>kHX5}w6PRmnw(!Gy>m_g*2opfklHyx>eb1bu|_lwJdf!ogxhk}X^v zc+^L;F7ta!8+i%6?M}XvQn4b%aOSCpDW+4#JDDG(wvXC*9%9(XBhbv4LX3R5G&(+@ z)nbdivYRQ5pW;9~@YGf{h~Rm(@MfV8Tj&T@EejO6(C#(+z7FVNBR`@j!#wScHM5ki%j+^GykUJ2m zYgpwm;#Q)~LoozUSV($?r3vQ~#ZU_}ggl~J%z*1dYt_^4K6e7o&qs_ORz{km+D+^a zqDdUO)d}|)v9h(Zz3}#DLWyRVCY!=PMCO{=PA)Upb@)1j?c)||l{6&pI=;U#bS#Jk zOOiwVH3FM!SuJDIPnN$|ZKz5fQwHmzn8f^?B+T2ew%~PSE#X_jk`Wu;a{4}9%AHg7 zZm8^bAee$bdpwklIE`$fV15=pI+tgJpll4uQjIM;Q!gvISFc_{@=lUSc-lABE%U?+ zHW$;!NcH1&F;AS~7RH=n<=!NTKnm3t`B@YeL?8d2{WGrmSjG;yBbY*9$N&DT^e?l2 z|1A2482Or7n7KF_TpRn|nmqD}`-=?QJ0z5q$C9Td^sML&aN7OGi+W$uYjDXKJg+0W@S=FoQP2dBI=48|FH>p2mh zFrdu!AwoG$NkvnZp_KT8HEo=RNNJ4IxucGXLr2N*I5Ao>Efb+pNOm9Zw0_7_s|9ac zS6}W##>$W*cBmksip;43p#a4&iTpM)8(gRGekW+AKm5zb)xpUFT>~b+FOH`Zs!$RDgpSCE z>;CL8Uu|EWeR~TvgDX@K=mtReFed;FZ!M2SjzW35i;UqfyemM?rq5yZS#hK5Y~|wt z2#^`Q6$b~uGT_++C3+B~#(oFHdSL&hh`Z8{t5#=ZkoaWVJoLm)3vT_@5HOnZGa;s~ z;4=E`3Eo@=$BxFjS`Iu|8SALB`<#TPTeE%h(dol+#CzJ=Zb&EHpw*=0H*~8x6 z`G`b<@>L2(AS*J!NVp`DN{g!8R#h(~URslf zC8PwGM$5V}+$WcoT*C~*$WmCpS6Gis&sZo|9OfRiwjX$f*&25Gjv6$YPde1smwGw( zb@y=gbl1!8>hm-il3&~zFca0~aJN!?b97+$E>2$Gn$31OR&UnE=Tm= zH44$Dx2HNN1lrCGjfuwo@+(m2j85w-oxre9FopupEV+6HACFyTbt}s-`lCCJ8om5RIE~T#Yg_DWu1u zyAp%jp;3&%D4;CRaR6g=f*ZvPqw2BadP=*ZYy_~CV3@wFx5YA(E8)jfqx z8tjEkMf>msMqi)zaY2fWrMq`lZzZdiMcluc(@(yxK(4hPEFk0~HO3^CUZk3;?Tv3` ze-rjZ8@hBrVPzA$^4hW?<33{d2)h7Jw?$t%V6(C_m+bNhXl9vXCJcBWmMeQoLDm5b zt9|A5pDHY#Y@(rlEo_WzXila!uaZE*WVc`=IM)SSc`#liZ2Wt*~fHgm9uH^ISX2d@)XGZ)_$qnbx6?J<14_=SS(ITs#LPDk03a&%x;bAuGz=P ze^<4p@tD@J|M;88;~IsEOPpB+&3C4!3q;}Kk2tb*WuuE z2u(BE$1(2AwbbBrmU-YLI4>#K((6&QZ~m2Yp;I14x0N8hos}{uoQuMG)Wy?ogaNayqmc&`I=8y6&dPf{Fky#B7 z#F=Xy213s`NFxjKuMqH3+ibWsFRi=QtH*j$9^)Zy8F|^vSmgj~l5<04MiU;BNyAn) zlM+c20Y#%@>WgdY>5kx}H)7*!D~BZJdg8d5iHx|>(jj=!MEmr)-$kH8?A#;DyBone(uz;e^|=9nIwfuWY?yw; zC|H`;8#O$vTPm5AW1Gg-Up&#Ca$<@!JZkAUDbmd*?X}QSA5$(*c+FZ|l+}F%*L1OH z{ck}P=j@=7>6ga#cqzj|ODXHD>ckIBmOd9Fh=~>?C7$uII_3rEX%UKdywsInR~{t- zg|t`~l=L1P_QPkZN53Q>!^A*QDZ zK(f;%VVQo)n1bsy)LWL#?&|wN`hL~Rnxhd3d-bOvlRQAiybH&=i;SlnwP$3P-!%x3^o)t6aoT-zXU}ARq-l^bOW-zg$@b|19Aua zF+k$V!uO;fNwCUEi;6!|5?4_MKtTq}|C`2gXh8EhWP1bTgZ)DqHZ&-x|E2*6Ka!RZ zS5jsHN&IW7%g1yUln@bn$cO!hR2b+`P~1-3dFIx!6EltRa{a z6Z@Y$_ug)~d%u)K$+?LYfc<87}bupdiK(3|m%hiA$Pc>zKNP0hqBj{X*L0rm@j(0s(f>>t{1L0?w#rS+#E)IdBKcF5|Dq-S zZ*-X3x;NeSuOSxS<3Q%uy1zwQ+?Kj&)Ou~-|2+&J{Zi^T=lx9+&+B^K_lQ;hY2H6D zeZ9T!H&;?$+kt+MLCs%i{8QEVi8<(Pft!mFt`}r~k5Y%93jAjQ!fgoD?Zh|Vi~q5A z27G^+_!lc1Zfo3}625-J{(B@p`IW|R4(!c|yX*Pn?*SA0)3iUGUB11uH>ab1{F$$g z|7q4=O#$9cezU54J)`wKI1_%J{14{0Zj0P3wEcKU`%-=?@(1PW+Zs0qGuI`%??IID dD~*3C;60WFKt@K_BOwYX49GZ$DDV2e{|AYb(KrAA literal 0 HcmV?d00001 diff --git a/java-sdk/gradle/wrapper/gradle-wrapper.properties b/java-sdk/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000000000..bc660c8e3d572 --- /dev/null +++ b/java-sdk/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#Mon Jan 19 21:06:09 CST 2026 +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.14-bin.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/java-sdk/gradlew b/java-sdk/gradlew new file mode 100755 index 0000000000000..adff685a0348c --- /dev/null +++ b/java-sdk/gradlew @@ -0,0 +1,248 @@ +#!/bin/sh + +# +# Copyright © 2015 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/java-sdk/gradlew.bat b/java-sdk/gradlew.bat new file mode 100644 index 0000000000000..c4bdd3ab8e3cc --- /dev/null +++ b/java-sdk/gradlew.bat @@ -0,0 +1,93 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem +@rem SPDX-License-Identifier: Apache-2.0 +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:execute +@rem Setup the command line + + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -jar "%APP_HOME%\gradle\wrapper\gradle-wrapper.jar" %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/java-sdk/sdk/build.gradle.kts b/java-sdk/sdk/build.gradle.kts new file mode 100644 index 0000000000000..08d0b9c9639ef --- /dev/null +++ b/java-sdk/sdk/build.gradle.kts @@ -0,0 +1,125 @@ +import org.jetbrains.kotlin.gradle.tasks.KotlinCompile +import org.jlleitschuh.gradle.ktlint.tasks.KtLintCheckTask +import java.time.ZonedDateTime + +buildscript { + repositories { + mavenCentral() + } +} + +val airflowExecApiVersion: String by project + +plugins { + kotlin("plugin.serialization") version "2.3.0" + id("org.openapi.generator") version "7.19.0" +} + +val constantsDir = layout.buildDirectory.dir("generate-constants/main/src/main/kotlin") + +dependencies { + compileOnly("com.github.spotbugs:spotbugs-annotations:4.9.8") + compileOnly("javax.annotation:javax.annotation-api:1.3.2") + compileOnly("org.apache.oltu.oauth2:org.apache.oltu.oauth2.client:1.0.1") + + implementation("com.fasterxml.jackson.core:jackson-annotations:2.21") + implementation("com.fasterxml.jackson.core:jackson-core:2.21.1") + implementation("com.fasterxml.jackson.core:jackson-databind:2.21.0") + implementation("com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.21.0") + implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.21.0") + implementation("com.squareup.retrofit2:converter-jackson:3.0.0") + implementation("com.squareup.retrofit2:converter-scalars:3.0.0") + implementation("com.squareup.retrofit2:retrofit:3.0.0") + implementation("com.xenomachina:kotlin-argparser:2.0.7") + implementation("io.ktor:ktor-network:3.3.3") + implementation("javax.ws.rs:javax.ws.rs-api:2.0") + implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.10.2") + implementation("org.jetbrains.kotlinx:kotlinx-datetime:0.7.1") + implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.10.0") + implementation("org.msgpack:msgpack-core:0.9.11") + implementation("org.msgpack:jackson-dataformat-msgpack:0.9.11") + + testImplementation(kotlin("test")) +} + +openApiGenerate { + generatorName = "java" + library = "retrofit2" + + remoteInputSpec = "https://airflow.apache.org/schemas/execution-api/$airflowExecApiVersion.json" + apiPackage = "org.apache.airflow.sdk.execution.api.route" + modelPackage = "org.apache.airflow.sdk.execution.api.model" + invokerPackage = "org.apache.airflow.sdk.execution.api.client" + + generateApiDocumentation = false + generateApiTests = false + generateModelDocumentation = false + generateModelTests = false + + // The spec on arbitrary mapping (e.g. 'extra') causes the OpenAPI generator to output JsonValue. + // We should probably fix the spec instead, but this should work before that. + // Suggested fix: + // type: object + // additionalProperties: true + schemaMappings.put("JsonValue", "java.lang.Object") + + additionalProperties = + mapOf( + "dateLibrary" to "java8", + "openApiNullable" to false, + "serializationLibrary" to "jackson", + "withXml" to false, + ) +} + +sourceSets { + main { + java.srcDir(layout.buildDirectory.dir("generate-resources/main/src/main/java")) + kotlin.srcDir(constantsDir) + } +} + +abstract class GenerateConstantsTask : DefaultTask() { + @get:Input + abstract val airflowExecApiVersionProp: Property + + @get:OutputDirectory + abstract val outputDirProp: DirectoryProperty + + @TaskAction + fun generate() { + val dir = outputDirProp.get().asFile.resolve("org/apache/airflow/sdk/execution") + dir.mkdirs() + dir.resolve("BuildConstants.kt").writeText( + """ + // File generated at ${ZonedDateTime.now()} + package org.apache.airflow.sdk.execution + + const val AIRFLOW_EXEC_API_VERSION = "${airflowExecApiVersionProp.get()}" + """.trimIndent() + "\n", + ) + } +} + +tasks.register("generateConstants") { + airflowExecApiVersionProp = airflowExecApiVersion + outputDirProp = constantsDir +} + +tasks.named("compileJava") { + dependsOn("openApiGenerate") +} + +tasks.named("compileKotlin") { + dependsOn("openApiGenerate") + dependsOn("generateConstants") +} + +tasks.named("runKtlintCheckOverMainSourceSet") { + dependsOn("openApiGenerate") + dependsOn("generateConstants") +} + +tasks.named("test") { + useJUnitPlatform() +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Bundle.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Bundle.kt new file mode 100644 index 0000000000000..56a4f0f65343c --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Bundle.kt @@ -0,0 +1,18 @@ +package org.apache.airflow.sdk + +class Bundle( + val version: String, + dags: Iterable, +) { + val dags: Map = dags.associateByDagId() +} + +private fun Iterable.associateByDagId(): Map { + val dagMap = linkedMapOf() + for (dag in this) { + require(dagMap.putIfAbsent(dag.dagId, dag) == null) { + "Duplicate dagId in bundle: ${dag.dagId}" + } + } + return dagMap +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleInspector.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleInspector.kt new file mode 100644 index 0000000000000..2da6b34c19829 --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleInspector.kt @@ -0,0 +1,40 @@ +package org.apache.airflow.sdk + +import java.io.File + +/** + * Build-time utility that inspects a [DagBundle] implementation and writes + * dag_ids and task_ids to a YAML metadata file for inclusion in the JAR. + * + * Usage: {@code java -cp org.apache.airflow.sdk.BundleInspector } + */ +object BundleInspector { + @JvmStatic + fun main(args: Array) { + require(args.size == 2) { "Usage: BundleInspector " } + val className = args[0] + val outputPath = args[1] + + val clazz = Class.forName(className) + val instance = + clazz.getDeclaredConstructor().newInstance() as? DagBundle + ?: error("$className does not implement ${DagBundle::class.qualifiedName}") + val dags = instance.getDags() + + val outputFile = File(outputPath) + outputFile.parentFile.mkdirs() + outputFile.writeText(toYaml(dags)) + } + + internal fun toYaml(dags: List): String = + buildString { + appendLine("dags:") + for (dag in dags) { + appendLine(" ${dag.dagId}:") + appendLine(" tasks:") + for (taskId in dag.tasks.keys) { + appendLine(" - $taskId") + } + } + } +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleScanner.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleScanner.kt new file mode 100644 index 0000000000000..32fc591f7003c --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleScanner.kt @@ -0,0 +1,117 @@ +package org.apache.airflow.sdk + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory +import org.apache.airflow.sdk.execution.containsJars +import org.apache.airflow.sdk.execution.isJarFile +import org.apache.airflow.sdk.execution.jarFiles +import java.io.File +import java.nio.file.Files +import java.nio.file.Path +import java.util.jar.JarFile + +const val METADATA_MANIFEST_KEY = "Airflow-Java-SDK-Metadata" + +private val yamlMapper = ObjectMapper(YAMLFactory()) + +/** + * A fully resolved bundle: everything needed to start the bundle process. + */ +data class ResolvedBundle( + val mainClass: String, + val classpath: String, +) + +/** + * Scans [bundlesDir] for Java DAG bundles by checking JAR manifests for the + * [METADATA_MANIFEST_KEY] attribute and reading the referenced YAML metadata. + * + * Supports two layouts: + * - **Nested**: each immediate subdirectory of [bundlesDir] is a bundle home. + * - **Flat**: [bundlesDir] itself contains the bundle JARs. + * + * Returns a mapping from dag_id to a [ResolvedBundle] with mainClass and classpath. + */ +fun scanBundles(bundlesDir: Path): Map { + if (!Files.isDirectory(bundlesDir)) return emptyMap() + val result = mutableMapOf() + + // Check each immediate subdirectory as a potential bundle home. + Files.list(bundlesDir).use { paths -> + paths.filter { Files.isDirectory(it) }.forEach { candidate -> + collectBundleDags(candidate, result) + } + } + + // Also check bundlesDir itself (flat layout). + collectBundleDags(bundlesDir, result) + + return result +} + +private fun collectBundleDags( + candidate: Path, + result: MutableMap, +) { + val bundleHome = normalizeBundleHome(candidate) + val resolved = resolveBundle(bundleHome) ?: return + for (dagId in resolved.first) { + result.putIfAbsent(dagId, resolved.second) + } +} + +/** + * Inspects JARs in [bundleHome] for [METADATA_MANIFEST_KEY] and Main-Class. + * Returns (dagIds, ResolvedBundle) or null if no JAR carries the metadata attribute. + */ +private fun resolveBundle(bundleHome: Path): Pair, ResolvedBundle>? { + val jars = jarFiles(bundleHome) + if (jars.isEmpty()) return null + + for (jarPath in jars) { + JarFile(jarPath.toFile()).use { jar -> + val attrs = jar.manifest?.mainAttributes ?: return@use + val metadataFile = attrs.getValue(METADATA_MANIFEST_KEY) ?: return@use + val mainClass = attrs.getValue("Main-Class") ?: return@use + val entry = jar.getJarEntry(metadataFile) ?: return@use + val content = jar.getInputStream(entry).bufferedReader().readText() + val dagIds = parseDagIdsFromYaml(content) + if (dagIds.isEmpty()) return@use + + val classpath = + jars + .map { it.toAbsolutePath().normalize().toString() } + .joinToString(File.pathSeparator) + + return dagIds to ResolvedBundle(mainClass, classpath) + } + } + return null +} + +fun readBundleDagIds(bundleHome: Path): Set { + for (jarPath in jarFiles(bundleHome)) { + JarFile(jarPath.toFile()).use { jar -> + val metadataFile = jar.manifest?.mainAttributes?.getValue(METADATA_MANIFEST_KEY) ?: return@use + val entry = jar.getJarEntry(metadataFile) ?: return@use + val content = jar.getInputStream(entry).bufferedReader().readText() + return parseDagIdsFromYaml(content) + } + } + return emptySet() +} + +fun parseDagIdsFromYaml(yaml: String): Set { + val root = yamlMapper.readTree(yaml) + val dagsNode = root.get("dags") ?: return emptySet() + val dagIds = mutableSetOf() + dagsNode.fieldNames().forEachRemaining { dagIds.add(it) } + return dagIds +} + +private fun normalizeBundleHome(path: Path): Path { + val normalized = path.toAbsolutePath().normalize() + if (normalized.isJarFile()) return normalized.parent + val lib = normalized.resolve("lib") + return if (containsJars(lib)) lib else normalized +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Client.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Client.kt new file mode 100644 index 0000000000000..582496ff0e2e0 --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Client.kt @@ -0,0 +1,59 @@ +package org.apache.airflow.sdk + +import org.apache.airflow.sdk.execution.Client +import org.apache.airflow.sdk.execution.StartupDetails + +class Client( + val details: StartupDetails, + val impl: Client, +) { + companion object { + const val XCOM_RETURN_KEY = "return_value" + } + + fun getConnection(id: String): Connection = + with(impl.getConnection(id)) { + Connection( + id = connId, + type = connType, + host = host, + schema = schema, + login = login, + password = password, + port = port, + extra = extra, + ) + } + + fun getVariable(key: String): Any? = impl.getVariable(key).value + + @JvmOverloads fun getXCom( + key: String = XCOM_RETURN_KEY, + dagId: String = details.ti.dagId, + taskId: String, + runId: String = details.ti.runId, + mapIndex: Int? = null, + includePriorDates: Boolean = false, + ): Any? = + impl + .getXCom( + key = key, + dagId = dagId, + taskId = taskId, + runId = runId, + mapIndex = mapIndex, + includePriorDates = includePriorDates, + ).value + + @JvmOverloads fun setXCom( + key: String = XCOM_RETURN_KEY, + value: Any, + ) = impl.setXCom( + key = key, + value = value, + dagId = details.ti.dagId, + taskId = details.ti.taskId, + runId = details.ti.runId, + mapIndex = details.ti.mapIndex ?: -1, + ) +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Config.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Config.kt new file mode 100644 index 0000000000000..1a6c69b2b901f --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Config.kt @@ -0,0 +1,136 @@ +package org.apache.airflow.sdk + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.Paths + +private const val CONFIG_FILE_NAME = "java-sdk.yaml" + +open class WorkerError( + message: String, +) : IllegalStateException(message) + +class NoBody : WorkerError("No body") + +/** + * SDK configuration resolved from environment variables and an optional YAML config file. + * + * Resolution order (highest priority first): + * 1. Environment variable `AIRFLOW__
__` (uppercase, double-underscore delimited) + * 2. YAML config file value at `
.` (lowercase) + * 3. Default value (where applicable) + * + * Only the canonical `AIRFLOW__
__` env var form is recognised. + * Single-underscore variants (`AIRFLOW_SECTION_KEY`) are **not** supported — use the + * YAML file for a more readable alternative. + * + * The YAML file is loaded from `$AIRFLOW_HOME/java-sdk.yaml` when present. + * + * ```yaml + * core: + * execution_api_server_url: "http://localhost:8080/execution/" + * + * sdk: + * bundles_dir: "./bin" + * + * api_auth: + * jwt_secret: "your-secret-key" + * jwt_issuer: "airflow" + * jwt_expiration_time: 30 + * ``` + * + * Each YAML key corresponds directly to the env-var option name: + * `core.execution_api_server_url` ↔ `AIRFLOW__CORE__EXECUTION_API_SERVER_URL`. + */ +class SdkConfig( + private val env: Map = System.getenv(), + yamlOverride: Path? = null, +) { + @Suppress("UNCHECKED_CAST") + private val yaml: Map> = + run { + val path = yamlOverride ?: resolveConfigPath(env) + if (path != null && Files.isRegularFile(path)) { + val raw = ObjectMapper(YAMLFactory()).readValue(path.toFile(), Map::class.java) as? Map ?: emptyMap() + raw.entries.associate { (k, v) -> + k to ((v as? Map<*, *>)?.entries?.associate { (ik, iv) -> ik.toString() to iv } ?: emptyMap()) + } + } else { + emptyMap() + } + } + + /** + * Look up a config value by section and key. + * Checks `AIRFLOW__
__` env var first, then YAML `
.`. + */ + fun get( + section: String, + key: String, + ): String? { + val envKey = "AIRFLOW__${section.uppercase()}__${key.uppercase()}" + env[envKey]?.takeIf { it.isNotBlank() }?.let { return it } + return yaml[section]?.get(key)?.toString()?.takeIf { it.isNotBlank() } + } + + /** Like [get] but throws [WorkerError] when the value is missing. */ + fun require( + section: String, + key: String, + ): String = + get(section, key) + ?: throw WorkerError( + "$section.$key must be configured " + + "(AIRFLOW__${section.uppercase()}__${key.uppercase()} or $CONFIG_FILE_NAME)", + ) + + /** Resolve a positive long, falling back to [default]. */ + fun getPositiveLong( + section: String, + key: String, + default: Long, + ): Long { + val raw = get(section, key) ?: return default + val parsed = + raw.toLongOrNull() + ?: throw WorkerError("$section.$key must be an integer") + if (parsed <= 0) throw WorkerError("$section.$key must be greater than 0") + return parsed + } + + // -- Execution API -- + + val executionApiUrl: String + get() { + val url = + get("core", "execution_api_server_url") + ?: get("execution", "api_url") + return url?.ensureTrailingSlash() + ?: throw WorkerError( + "core.execution_api_server_url must be configured " + + "(AIRFLOW__CORE__EXECUTION_API_SERVER_URL or $CONFIG_FILE_NAME)", + ) + } + + // -- JWT -- + + val jwtSecret: String get() = require("api_auth", "jwt_secret") + val jwtIssuer: String? get() = get("api_auth", "jwt_issuer") + val jwtExpirationTime: Long get() = getPositiveLong("api_auth", "jwt_expiration_time", 30) + + // -- Bundle resolution -- + + val bundlesDir: Path? + get() = get("sdk", "bundles_dir")?.let(Paths::get) + + companion object { + private fun resolveConfigPath(env: Map): Path? { + val home = env["AIRFLOW_HOME"]?.takeIf { it.isNotBlank() } ?: return null + return Paths.get(home, CONFIG_FILE_NAME) + } + } +} + +internal fun String.ensureTrailingSlash() = if (endsWith('/')) this else "$this/" diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Connection.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Connection.kt new file mode 100644 index 0000000000000..98a7dab39bded --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Connection.kt @@ -0,0 +1,12 @@ +package org.apache.airflow.sdk + +data class Connection( + val id: String, + val type: String?, + val host: String?, + val schema: String?, + val login: String?, + val password: String?, + val port: Int?, + val extra: String?, +) diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Dag.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Dag.kt new file mode 100644 index 0000000000000..0df3f1251f105 --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Dag.kt @@ -0,0 +1,86 @@ +package org.apache.airflow.sdk + +import java.time.Duration +import java.time.Instant + +/** + * A Dag (Directed Acyclic Graph) is a collection of tasks with directional dependencies. + * + * A Dag has a schedule, a start date and an end date (optional). For each schedule + * (say daily or hourly), the Dag needs to run each individual task as their + * dependencies are met. + * + * @param dagId The id of the Dag; must consist exclusively of alphanumeric characters, + * dashes, dots and underscores (all ASCII). + * @param description The description for the Dag to e.g. be shown on the webserver. + * @param schedule Defines when Dag runs are scheduled. Can be a cron expression string + * (e.g. "0 0 * * *"), a preset (e.g. "@daily", "@hourly", "@once", "@continuous"), + * or null for no schedule. + * @param startDate The timestamp from which the scheduler will attempt to backfill. + * @param endDate A date beyond which your Dag won't run; leave null for open-ended scheduling. + * @param defaultArgs A map of default parameters to be used as constructor keyword + * parameters when initialising operators. + * @param maxActiveTasks The number of task instances allowed to run concurrently per Dag run. + * @param maxActiveRuns Maximum number of active Dag runs. + * @param maxConsecutiveFailedDagRuns Maximum number of consecutive failed Dag runs, + * beyond this the scheduler will disable the Dag. + * @param dagrunTimeout Duration a DagRun is allowed to run before it times out or fails. + * @param catchup Perform scheduler catchup (or only run latest)? Defaults to false. + * @param docMd Documentation in markdown format. + * @param accessControl Optional Dag-level access control actions, e.g. + * mapOf("role1" to mapOf("DAGs" to setOf("can_read", "can_edit"))). + * @param isPausedUponCreation Whether the Dag is paused when created for the first time. + * @param tags Set of tags to help filtering Dags in the UI. + * @param ownerLinks Map of owners and their links, clickable on the Dags view UI. + * @param failFast Fails currently running tasks when a task in Dag fails. + * @param dagDisplayName The display name of the Dag on the UI. Defaults to dagId. + * @param renderTemplateAsNativeObj If true, uses native rendering for templates. + * @param params A map of Dag-level parameters accessible in templates, namespaced under + * "params". These can be overridden at the task level. + */ +class Dag + @JvmOverloads + constructor( + val dagId: String, + val description: String? = null, + val schedule: String? = null, + val startDate: Instant? = null, + val endDate: Instant? = null, + val defaultArgs: Map = emptyMap(), + val maxActiveTasks: Int = DEFAULT_MAX_ACTIVE_TASKS, + val maxActiveRuns: Int = DEFAULT_MAX_ACTIVE_RUNS, + val maxConsecutiveFailedDagRuns: Int = DEFAULT_MAX_CONSECUTIVE_FAILED_DAG_RUNS, + val dagrunTimeout: Duration? = null, + val catchup: Boolean = false, + val docMd: String? = null, + val accessControl: Map>>? = null, + val isPausedUponCreation: Boolean? = null, + val tags: Set = emptySet(), + val ownerLinks: Map = emptyMap(), + val failFast: Boolean = false, + val dagDisplayName: String? = null, + val renderTemplateAsNativeObj: Boolean = false, + val params: Map? = null, + ) { + internal var tasks = mutableMapOf>() + internal var dependants = mutableMapOf>() + + @JvmOverloads + fun addTask( + id: String, + definition: Class, + dependsOn: List = emptyList(), + ) { + // TODO: Check duplicate key. + tasks[id] = definition + for (parent in dependsOn) { + dependants.getOrPut(parent) { mutableSetOf() }.add(id) + } + } + + companion object { + const val DEFAULT_MAX_ACTIVE_TASKS = 16 + const val DEFAULT_MAX_ACTIVE_RUNS = 16 + const val DEFAULT_MAX_CONSECUTIVE_FAILED_DAG_RUNS = 0 + } + } diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/DagBundle.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/DagBundle.kt new file mode 100644 index 0000000000000..9890bd20b6076 --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/DagBundle.kt @@ -0,0 +1,12 @@ +package org.apache.airflow.sdk + +/** + * Interface for declaring DAGs in a bundle. + * + *

Implement this interface in the class specified as {@code Main-Class} in your JAR manifest. + * The build system instantiates this class at compile time to extract dag_ids and task_ids + * into the JAR manifest, enabling inspection of bundled DAGs without running the full process. + */ +interface DagBundle { + fun getDags(): List +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Server.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Server.kt new file mode 100644 index 0000000000000..bc72bfa3296cf --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Server.kt @@ -0,0 +1,79 @@ +package org.apache.airflow.sdk + +import com.xenomachina.argparser.ArgParser +import io.ktor.network.selector.SelectorManager +import io.ktor.network.sockets.InetSocketAddress +import io.ktor.network.sockets.aSocket +import io.ktor.network.sockets.openReadChannel +import io.ktor.network.sockets.openWriteChannel +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.async +import kotlinx.coroutines.awaitAll +import kotlinx.coroutines.launch +import kotlinx.coroutines.runBlocking +import org.apache.airflow.sdk.execution.CoordinatorComm +import org.apache.airflow.sdk.execution.LogSender +import org.apache.airflow.sdk.execution.Logger +import kotlin.text.substringAfterLast +import kotlin.text.substringBeforeLast + +private class Args( + parser: ArgParser, +) { + private fun parseAddress(address: String): InetSocketAddress = + InetSocketAddress( + address.substringBeforeLast(':'), + address.substringAfterLast(':').toInt(), + ) + + val comm by parser.storing("--comm", help = "Address (host:port) to communicate with parent") { + parseAddress(this) + } + val logs by parser.storing("--logs", help = "Address (host:port) to send Airflow logs to") { + parseAddress(this) + } +} + +class ApiError( + message: String, +) : IllegalStateException(message) + +class Server( + private val comm: InetSocketAddress, + private val logs: InetSocketAddress, +) { + companion object { + @JvmStatic + fun create(args: Array): Server { + val args = ArgParser(args).parseInto(::Args) + return Server(args.comm, args.logs) + } + } + + private val logger = Logger(Server::class) + + fun serve(bundle: Bundle) { + runBlocking { + launch { + awaitAll( + async { + aSocket(SelectorManager(Dispatchers.IO)).tcp().connect(comm).use { socket -> + logger.debug("Connected comm", mapOf("addr" to comm)) + CoordinatorComm( + bundle, + socket.openReadChannel(), + socket.openWriteChannel(autoFlush = true), + ).startProcessing() + } + }, + async { + aSocket(SelectorManager(Dispatchers.IO)).tcp().connect(logs).use { socket -> + logger.debug("Connected logs", mapOf("addr" to logs)) + LogSender.configure(socket.openWriteChannel(autoFlush = true)) + } + }, + ) + } + } + } +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Task.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Task.kt new file mode 100644 index 0000000000000..08ca52e2ae5c1 --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Task.kt @@ -0,0 +1,8 @@ +package org.apache.airflow.sdk + +import kotlin.Throws + +interface Task { + @Throws(Exception::class) + fun execute(client: Client) +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Client.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Client.kt new file mode 100644 index 0000000000000..7388b308e6d3c --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Client.kt @@ -0,0 +1,143 @@ +package org.apache.airflow.sdk.execution + +import kotlinx.coroutines.runBlocking +import org.apache.airflow.sdk.execution.api.client.ApiClient +import org.apache.airflow.sdk.execution.api.model.ConnectionResponse +import org.apache.airflow.sdk.execution.api.model.VariableResponse +import org.apache.airflow.sdk.execution.api.model.XComResponse +import org.apache.airflow.sdk.execution.api.route.ConnectionsApi +import org.apache.airflow.sdk.execution.api.route.VariablesApi +import org.apache.airflow.sdk.execution.api.route.XComsApi +import java.time.LocalDate + +interface Client { + fun getConnection(id: String): ConnectionResponse + + fun getVariable(key: String): VariableResponse + + fun getXCom( + key: String, + dagId: String, + taskId: String, + runId: String, + mapIndex: Int? = null, + includePriorDates: Boolean = false, + ): XComResponse + + fun setXCom( + key: String, + value: Any, + dagId: String, + taskId: String, + runId: String, + mapIndex: Int, + ) +} + +class CoordinatorClient( + val exec: CoordinatorComm, +) : Client { + override fun getConnection(id: String) = runBlocking { exec.communicate(GetConnection(id)) } + + override fun getVariable(key: String) = runBlocking { exec.communicate(GetVariable(key)) } + + override fun setXCom( + key: String, + value: Any, + dagId: String, + taskId: String, + runId: String, + mapIndex: Int, + ) { + val message = + SetXCom( + key = key, + value = value, + dagId = dagId, + taskId = taskId, + runId = runId, + mapIndex = mapIndex, + ) + runBlocking { exec.communicate(message) } + } + + override fun getXCom( + key: String, + dagId: String, + taskId: String, + runId: String, + mapIndex: Int?, + includePriorDates: Boolean, + ): XComResponse { + val message = + GetXCom( + key = key, + dagId = dagId, + taskId = taskId, + runId = runId, + mapIndex = mapIndex, + includePriorDates = includePriorDates, + ) + return runBlocking { exec.communicate(message) } + } +} + +class HttpExecApiClient( + val http: ApiClient, +) : Client { + companion object { + val version: LocalDate = LocalDate.parse(AIRFLOW_EXEC_API_VERSION) + } + + override fun getConnection(id: String) = + http.communicate { + getConnection(id, version) + } + + override fun getVariable(key: String) = + http.communicate { + getVariable(key, version) + } + + override fun getXCom( + key: String, + dagId: String, + taskId: String, + runId: String, + mapIndex: Int?, + includePriorDates: Boolean, + ) = http.communicate { + getXcom( + dagId, + runId, + taskId, + key, + mapIndex, + includePriorDates, + 0, + version, + ) + } + + override fun setXCom( + key: String, + value: Any, + dagId: String, + taskId: String, + runId: String, + mapIndex: Int, + ) { + http.communicate { + setXcom( + dagId, + runId, + taskId, + key, + mapIndex, + null, + version, + value, + ) + } + } +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Comms.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Comms.kt new file mode 100644 index 0000000000000..acc0e5bb4154f --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Comms.kt @@ -0,0 +1,240 @@ +package org.apache.airflow.sdk.execution + +import com.fasterxml.jackson.annotation.JsonProperty +import com.fasterxml.jackson.annotation.JsonPropertyOrder +import io.ktor.utils.io.ByteReadChannel +import io.ktor.utils.io.ByteWriteChannel +import io.ktor.utils.io.readByteArray +import io.ktor.utils.io.writeByteArray +import org.apache.airflow.sdk.ApiError +import org.apache.airflow.sdk.Bundle +import org.apache.airflow.sdk.execution.api.client.ApiClient +import org.apache.airflow.sdk.execution.api.model.AssetProfile +import org.apache.airflow.sdk.execution.api.model.BundleInfo +import org.apache.airflow.sdk.execution.api.model.TIRunContext +import org.apache.airflow.sdk.execution.api.model.TISuccessStatePayload +import org.apache.airflow.sdk.execution.api.model.TaskInstance +import retrofit2.Call +import java.time.OffsetDateTime +import kotlin.concurrent.atomics.AtomicInt +import kotlin.concurrent.atomics.ExperimentalAtomicApi +import kotlin.system.exitProcess + +data class IncomingFrame( + val id: Int, + val body: Any?, +) + +data class OutgoingFrame( + val id: Int, + val body: Any, +) + +class ErrorResponse { + @JsonProperty("error") + var error: String = "" // TODO: Use enum. + + @JsonProperty("detail") + var detail: Any? = null +} + +class DagFileParseRequest { + var file: String = "" + + @JsonProperty("bundle_path") + var bundlePath: String = "" +} + +class StartupDetails { + @JsonProperty("ti") + lateinit var ti: TaskInstance + + @JsonProperty("dag_rel_path") + var dagRelPath: String = "" + + @JsonProperty("bundle_info") + lateinit var bundleInfo: BundleInfo + + @JsonProperty("start_date") + lateinit var startDate: OffsetDateTime + + @JsonProperty("ti_context") + lateinit var tiContext: TIRunContext + + @JsonProperty("sentry_integration") + var sentryIntegration: String = "" +} + +class SucceedTask : TISuccessStatePayload { + constructor( + endDate: OffsetDateTime = OffsetDateTime.now(), + taskOutlets: List = emptyList(), + outletEvents: List> = emptyList(), + ) { + endDate(endDate) + taskOutlets(taskOutlets) + outletEvents(outletEvents) + } + + val type = "SucceedTask" +} + +@JsonPropertyOrder(value = ["state", "end_date", "type"]) +data class TaskState( + val state: String, // TODO: Use enum (failed, removed, skipped) and custom serialization. + @get:JsonProperty("end_date") val endDate: OffsetDateTime = OffsetDateTime.now(), +) { + val type = "TaskState" +} + +data class GetConnection( + @get:JsonProperty("conn_id") val id: String, +) { + val type = "GetConnection" +} + +data class GetVariable( + val key: String, +) { + val type = "GetVariable" +} + +data class GetXCom( + val key: String, + @get:JsonProperty("dag_id") val dagId: String, + @get:JsonProperty("task_id") val taskId: String, + @get:JsonProperty("run_id") val runId: String, + @get:JsonProperty("map_index") val mapIndex: Int? = null, + @get:JsonProperty("include_prior_dates") val includePriorDates: Boolean = false, +) { + val type = "GetXCom" +} + +data class SetXCom( + val key: String, + val value: Any, + @get:JsonProperty("dag_id") val dagId: String, + @get:JsonProperty("task_id") val taskId: String, + @get:JsonProperty("run_id") val runId: String, + @get:JsonProperty("map_index") val mapIndex: Int, + @get:JsonProperty("mapped_length") val mappedLength: Int? = null, +) { + val type = "SetXCom" +} + +@OptIn(ExperimentalAtomicApi::class) +class CoordinatorComm( + private val bundle: Bundle, + private val reader: ByteReadChannel, + private val writer: ByteWriteChannel, +) { + internal companion object { + private val logger = Logger(CoordinatorComm::class) + + fun encode(outgoing: OutgoingFrame): ByteArray { + val body = + when (val message = outgoing.body) { + is DagParsingResult -> message.serialize() + else -> message + } + return TaskSdkFrames.encodeRequest(outgoing.id, body) + } + + fun decode(bytes: ByteArray): IncomingFrame = TaskSdkFrames.decode(bytes, TaskSdkFrames.toBundleProcessTypes) + } + + private val nextId = AtomicInt(0) + private var shutDownRequested = false + + suspend fun startProcessing() { + while (!shutDownRequested) { + processOnce(::handleIncoming) + } + logger.debug("Goodbye") + } + + private suspend fun processOnce(handle: suspend (IncomingFrame) -> Unit) { + val prefix = reader.readByteArray(4) // First 4 bytes as length. + if (prefix.size != 4) { // Something is terribly wrong. Let's bail. + logger.error("Need 4 prefix bytes", mapOf("actual" to prefix.size)) + shutDownRequested = true + return + } + + val payloadLength = TaskSdkFrames.parseLengthPrefix(prefix) + val payload = reader.readByteArray(payloadLength) + if (payload.size != payloadLength) { // Something is terribly wrong. Let's bail. + logger.error( + "Payload length not right", + mapOf("expect" to payloadLength, "receive" to payload.size), + ) + shutDownRequested = true + return + } + val frame = decode(payload) + logger.debug("Handling", mapOf("id" to frame.id)) + handle(frame) + } + + private suspend fun sendMessage( + id: Int, + body: Any, + ) { + val data = encode(OutgoingFrame(id, body)) + logger.debug("Sending", mapOf("id" to id, "body" to body)) + writer.writeByteArray(TaskSdkFrames.lengthPrefix(data.size)) + writer.writeByteArray(data) + } + + suspend fun handleIncoming(frame: IncomingFrame) { + when (val request = frame.body) { + null -> {} + is ErrorResponse -> { + println("Error!! id=${frame.id} [${request.error}] ${request.detail}") // TODO: Handle error. + exitProcess(1) + } + is DagFileParseRequest -> { + val body = DagParser(request.file, request.bundlePath).parse(bundle) + sendMessage(frame.id, body) + shutDownRequested = true + } + is StartupDetails -> { + sendMessage(frame.id, TaskRunner.run(bundle, request, this)) + shutDownRequested = true + } + } + } + + @Throws(ApiError::class) + suspend fun communicateImpl(body: Any): Any { + var frame: IncomingFrame? = null + + suspend fun handle(f: IncomingFrame) { + frame = f + } + sendMessage(nextId.fetchAndAdd(1), body) + processOnce(::handle) + if (frame == null) { + throw ApiError("No response received") + } + return frame.body ?: Unit + } + + @Throws(ApiError::class) + suspend inline fun communicate(request: Any): T { + when (val response = communicateImpl(request)) { + is ErrorResponse -> throw ApiError("[${response.error}] ${response.detail}") + is T -> return response + else -> throw ApiError("Unexpected response type ${response::class.java}") + } + } +} + +internal inline fun ApiClient.communicate(block: S.() -> Call): R { + val service = createService(S::class.java) + val response = block(service).execute() + if (!response.isSuccessful) { + throw ApiError("[${response.message()}] $response (from $service") + } + return response.body() ?: throw ApiError("No body") +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/DagParser.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/DagParser.kt new file mode 100644 index 0000000000000..4cba6fd775365 --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/DagParser.kt @@ -0,0 +1,17 @@ +package org.apache.airflow.sdk.execution + +import org.apache.airflow.sdk.Bundle +import org.apache.airflow.sdk.Dag + +data class DagParsingResult( + val fileloc: String, + val bundlePath: String, + val dags: Map, +) + +class DagParser( + val file: String, + val bundlePath: String, +) { + fun parse(bundle: Bundle): DagParsingResult = DagParsingResult(file, bundlePath, bundle.dags) +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/JarUtils.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/JarUtils.kt new file mode 100644 index 0000000000000..1b62162ab0ee3 --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/JarUtils.kt @@ -0,0 +1,25 @@ +package org.apache.airflow.sdk.execution + +import java.nio.file.Files +import java.nio.file.Path + +/** True when [this] points to a regular file whose name ends with `.jar`. */ +fun Path.isJarFile(): Boolean = Files.isRegularFile(this) && fileName.toString().endsWith(".jar") + +/** Lists JAR files in [directory], sorted by path name. */ +fun jarFiles(directory: Path): List { + if (!Files.isDirectory(directory)) return emptyList() + val jars = mutableListOf() + Files.list(directory).use { paths -> + paths + .filter { it.isJarFile() } + .sorted() + .forEach { jars.add(it) } + } + return jars +} + +/** True when [directory] contains at least one JAR file. */ +fun containsJars(directory: Path): Boolean = + Files.isDirectory(directory) && + Files.list(directory).use { paths -> paths.anyMatch { it.isJarFile() } } diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Logger.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Logger.kt new file mode 100644 index 0000000000000..1378531673dbb --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Logger.kt @@ -0,0 +1,108 @@ +package org.apache.airflow.sdk.execution + +import io.ktor.utils.io.ByteWriteChannel +import io.ktor.utils.io.writeString +import kotlinx.coroutines.runBlocking +import kotlinx.datetime.LocalDateTime +import kotlinx.datetime.TimeZone +import kotlinx.datetime.toLocalDateTime +import kotlinx.serialization.json.JsonElement +import kotlinx.serialization.json.JsonNull +import kotlinx.serialization.json.JsonPrimitive +import kotlinx.serialization.json.buildJsonArray +import kotlinx.serialization.json.buildJsonObject +import kotlin.reflect.KClass +import kotlin.time.Clock + +enum class Level { ERROR, DEBUG, } + +internal data class LogMessage( + val event: String, + val arguments: Map, + val logger: Logger, + val level: Level, + val timestamp: LocalDateTime = Clock.System.now().toLocalDateTime(TimeZone.currentSystemDefault()), +) + +internal class Logger( + cls: KClass<*>, +) { + val name: String? = cls.java.typeName + + // TODO: Actually implement level filtering. + @Suppress("UNUSED_PARAMETER") + fun isEnabledForLevel(level: Level): Boolean = true + + fun debug( + message: String, + arguments: Map = emptyMap(), + ) { + log(Level.DEBUG, message, arguments) + } + + fun error( + message: String, + arguments: Map = emptyMap(), + ) { + log(Level.ERROR, message, arguments) + } + + private fun log( + level: Level, + event: String, + arguments: Map, + ) { + if (!isEnabledForLevel(level)) return + LogSender.send(LogMessage(event, arguments, this, level)) + } +} + +internal object LogSender { + private var writer: ByteWriteChannel? = null + val messages: MutableList = mutableListOf() + + fun configure(channel: ByteWriteChannel) { + writer = channel + if (!channel.isClosedForWrite) { + while (messages.isNotEmpty()) { + sendTo(channel, messages.removeAt(0)) + } + } + } + + fun send(message: LogMessage) { + val channel = writer + if (channel == null || channel.isClosedForWrite) { + messages.add(message) + } else { + sendTo(channel, message) + } + } + + private fun sendTo( + writer: ByteWriteChannel, + message: LogMessage, + ) { + val map = message.arguments.toMutableMap() + map["event"] = message.event + map["level"] = message.level.name.lowercase() + map["logger"] = message.logger.name ?: "(java)" + map["timestamp"] = message.timestamp + // TODO: Can this be done asynchronously instead? + runBlocking { writer.writeString("${map.toJsonElement()}\n") } + } +} + +private fun Any?.toJsonElement(): JsonElement = + when (this) { + is JsonElement -> this + is Map<*, *> -> + buildJsonObject { + forEach { (k, v) -> put(k.toString(), v.toJsonElement()) } + } + is Iterable<*> -> buildJsonArray { forEach { add(it.toJsonElement()) } } + is Number -> JsonPrimitive(this) + is String -> JsonPrimitive(this) + null -> JsonNull + else -> JsonPrimitive(toString()) // Also correctly handles Kotlinx DateTime. + } diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/MsgPack.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/MsgPack.kt new file mode 100644 index 0000000000000..f951248bf6952 --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/MsgPack.kt @@ -0,0 +1,125 @@ +package org.apache.airflow.sdk.execution + +import com.fasterxml.jackson.core.JsonParser +import com.fasterxml.jackson.core.JsonToken +import com.fasterxml.jackson.core.util.JacksonFeatureSet +import com.fasterxml.jackson.databind.DeserializationContext +import com.fasterxml.jackson.databind.deser.std.StdDeserializer +import com.fasterxml.jackson.databind.module.SimpleModule +import com.fasterxml.jackson.datatype.jsr310.JavaTimeFeature +import com.fasterxml.jackson.datatype.jsr310.deser.InstantDeserializer +import org.msgpack.core.ExtensionTypeHeader +import org.msgpack.core.MessagePack +import org.msgpack.core.MessagePacker +import org.msgpack.core.MessageUnpacker +import org.msgpack.jackson.dataformat.MessagePackExtensionType +import org.msgpack.value.ArrayValue +import org.msgpack.value.MapValue +import org.msgpack.value.Value +import org.msgpack.value.ValueType +import java.math.BigInteger +import java.time.OffsetDateTime +import java.time.ZoneOffset + +private fun MessagePacker.packByteArray(data: ByteArray) { + packBinaryHeader(data.size) + data.forEach { packByte(it) } +} + +private fun MessagePacker.packMap(data: Map<*, *>) { + packMapHeader(data.size) + data.forEach { (k, v) -> + check(k is String) + packString(k) + packAny(v) + } +} + +private fun MessagePacker.packCollection(data: Collection<*>) { + packArrayHeader(data.size) + data.forEach { packAny(it) } +} + +fun MessagePacker.packAny(data: Any?) { + when (data) { + null -> packNil() + is Boolean -> packBoolean(data) + is Byte -> packByte(data) + is Short -> packShort(data) + is Int -> packInt(data) + is Long -> packLong(data) + is BigInteger -> packBigInteger(data) + is Float -> packFloat(data) + is Double -> packDouble(data) + is ByteArray -> packByteArray(data) + is String -> packString(data) + is Map<*, *> -> packMap(data) + is Collection<*> -> packCollection(data) + else -> throw IllegalArgumentException("Unsupported data type: $data") + } +} + +private fun ArrayValue.decodeArray(): List<*> = + mutableListOf().also { + iterator().forEach { v -> it.add(v.decode()) } + } + +private fun MapValue.decodeMap(): Map<*, *> = + mutableMapOf().also { + entrySet().forEach { (k, v) -> it[k.asStringValue().asString()] = v.decode() } + } + +private fun Value.decode(): Any? = + when (valueType) { + ValueType.NIL -> null + ValueType.BOOLEAN -> asBooleanValue().boolean + ValueType.INTEGER -> + with(asIntegerValue()) { + if (isInLongRange) asLong() else asBigInteger() + } + ValueType.FLOAT -> asFloatValue().toDouble() + ValueType.STRING -> asStringValue().asString() + ValueType.BINARY -> asBinaryValue().asByteArray() + ValueType.ARRAY -> asArrayValue().decodeArray() + ValueType.MAP -> asMapValue().decodeMap() + else -> throw IllegalArgumentException("Unsupported data type: $this") + } + +fun MessageUnpacker.unpackAny(): Any? = unpackValue().decode() + +class TimestampToJavaOffsetDateTimeModule : SimpleModule() { + companion object { + const val EXT_TYPE: Byte = -1 + } + + class OffsetDateTimeDeserializer : StdDeserializer(OffsetDateTime::class.java) { + val instantDeserializer = + InstantDeserializer.OFFSET_DATE_TIME.withFeatures( + JacksonFeatureSet.fromDefaults(JavaTimeFeature.entries.toTypedArray()), + ) + + override fun deserialize( + p: JsonParser, + ctxt: DeserializationContext, + ): OffsetDateTime { + if (p.currentToken == JsonToken.VALUE_EMBEDDED_OBJECT) { + deserializeMsgPackTimestamp(p)?.let { return it } + } + return instantDeserializer.deserialize(p, ctxt) + } + + private fun deserializeMsgPackTimestamp(p: JsonParser): OffsetDateTime? { + val ext = p.readValueAs(MessagePackExtensionType::class.java) + if (ext.type != EXT_TYPE) { + return null + } + val unpacker = MessagePack.newDefaultUnpacker(ext.data) + val instant = unpacker.unpackTimestamp(ExtensionTypeHeader(EXT_TYPE, ext.data.size)) + return instant.atOffset(ZoneOffset.UTC) + } + } + + init { + addDeserializer(OffsetDateTime::class.java, OffsetDateTimeDeserializer()) + } +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt new file mode 100644 index 0000000000000..21a398298662e --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt @@ -0,0 +1,275 @@ +package org.apache.airflow.sdk.execution + +import org.apache.airflow.sdk.Dag +import org.apache.airflow.sdk.Task +import java.nio.file.Path +import java.time.Duration +import java.time.Instant + +/** + * Serialization logic decoupled from user-facing SDK classes. + * + * Produces output compatible with Python Airflow's DagSerialization format (version 3). + */ +typealias Serialized = Map + +private object SerdeScope + +private val logger = Logger(SerdeScope::class) + +// --------------------------------------------------------------------------- +// Value encoding — matches Python's BaseSerialization.serialize +// --------------------------------------------------------------------------- + +/** + * Recursively serialize a value with Airflow's type/var encoding. + * + * Primitives pass through; complex types are wrapped in {"__type": ..., "__var": ...}. + * This matches the Python BaseSerialization.serialize() output exactly: + * - dict -> {"__type": "dict", "__var": {k: serialize(v), ...}} + * - set -> {"__type": "set", "__var": [sorted items]} + * - datetime -> {"__type": "datetime", "__var": epoch_seconds} + * - timedelta -> {"__type": "timedelta", "__var": total_seconds} + */ +internal fun serializeValue(value: Any?): Any? = + when (value) { + null -> null + is String, is Boolean, is Int, is Long, is Float, is Double -> value + is Instant -> + mapOf( + "__type" to "datetime", + "__var" to (value.epochSecond.toDouble() + value.nano.toDouble() / 1_000_000_000.0), + ) + is Duration -> + mapOf( + "__type" to "timedelta", + "__var" to (value.toMillis().toDouble() / 1000.0), + ) + is Map<*, *> -> + mapOf( + "__type" to "dict", + "__var" to value.entries.associate { (k, v) -> k.toString() to serializeValue(v) }, + ) + is Set<*> -> { + val items = value.map { serializeValue(it) } + mapOf( + "__type" to "set", + "__var" to + try { + items.sortedBy { it?.toString() ?: "" } + } catch (_: Exception) { + items + }, + ) + } + is List<*> -> value.map { serializeValue(it) } + else -> value.toString() + } + +/** + * Unwrap a single level of type encoding, extracting the "__var" part. + * + * In Python's serialize_to_json, non-decorated fields are serialized then unwrapped: + * value = cls.serialize(value) + * if isinstance(value, dict) and Encoding.TYPE in value: + * value = value[Encoding.VAR] + */ +private fun unwrapTypeEncoding(value: Any?): Any? = + if (value is Map<*, *> && "__type" in value && "__var" in value) { + value["__var"] + } else { + value + } + +// --------------------------------------------------------------------------- +// Timetable serialization +// --------------------------------------------------------------------------- + +private fun serializeTimetable(schedule: String?): Serialized = + when (schedule) { + null -> + mapOf( + "__type" to "airflow.timetables.simple.NullTimetable", + "__var" to emptyMap(), + ) + "@once" -> + mapOf( + "__type" to "airflow.timetables.simple.OnceTimetable", + "__var" to emptyMap(), + ) + "@continuous" -> + mapOf( + "__type" to "airflow.timetables.simple.ContinuousTimetable", + "__var" to emptyMap(), + ) + else -> + mapOf( + "__type" to "airflow.timetables.trigger.CronTriggerTimetable", + "__var" to + mapOf( + "expression" to schedule, + "timezone" to "UTC", + "interval" to 0.0, + "run_immediately" to false, + ), + ) + } + +// --------------------------------------------------------------------------- +// Task serialization +// --------------------------------------------------------------------------- + +private fun Class.serialize( + id: String, + dependants: Collection?, +): Serialized { + val data = + mutableMapOf( + "task_id" to id, + "task_type" to simpleName, + "_task_module" to name.substringBeforeLast('.'), + "language" to "java", + ) + if (!dependants.isNullOrEmpty()) { + data["downstream_task_ids"] = dependants.sorted() + } + return mapOf("__type" to "operator", "__var" to data) +} + +// --------------------------------------------------------------------------- +// Task group serialization (flat root group from task list) +// --------------------------------------------------------------------------- + +private fun serializeTaskGroup(taskIds: Collection): Serialized = + mapOf( + "_group_id" to null, + "group_display_name" to "", + "prefix_group_id" to true, + "tooltip" to "", + "ui_color" to "CornflowerBlue", + "ui_fgcolor" to "#000", + "children" to taskIds.associateWith { listOf("operator", it) }, + "upstream_group_ids" to emptyList(), + "downstream_group_ids" to emptyList(), + "upstream_task_ids" to emptyList(), + "downstream_task_ids" to emptyList(), + ) + +// --------------------------------------------------------------------------- +// Params serialization +// --------------------------------------------------------------------------- + +private fun serializeParams(params: Map): List> = + params.entries.map { (k, v) -> + listOf( + k, + mapOf( + "__class" to "airflow.sdk.definitions.param.Param", + "default" to serializeValue(v), + "description" to null, + "schema" to serializeValue(emptyMap()), + "source" to null, + ), + ) + } + +// --------------------------------------------------------------------------- +// DAG serialization — matches Python's DagSerialization.serialize_dag +// --------------------------------------------------------------------------- + +private fun Dag.serialize( + id: String, + fileloc: String, + relativeFileloc: String, +): Serialized { + val result = + mutableMapOf( + // Required fields (always present) + "dag_id" to id, + "fileloc" to fileloc, + "relative_fileloc" to relativeFileloc, + // Always serialized + "timezone" to "UTC", + "timetable" to serializeTimetable(schedule), + "tasks" to tasks.entries.map { (taskId, task) -> task.serialize(taskId, dependants[taskId]) }, + "dag_dependencies" to emptyList(), + "task_group" to serializeTaskGroup(tasks.keys), + "edge_info" to emptyMap(), + "params" to (params?.let { serializeParams(it) } ?: emptyList()), + "deadline" to null, + "allowed_run_types" to null, + ) + + // Optional fields — only include if non-null. + // Non-decorated fields are serialized then unwrapped (matching Python's serialize_to_json). + description?.let { result["description"] = it } + startDate?.let { result["start_date"] = unwrapTypeEncoding(serializeValue(it)) } + endDate?.let { result["end_date"] = unwrapTypeEncoding(serializeValue(it)) } + dagrunTimeout?.let { result["dagrun_timeout"] = unwrapTypeEncoding(serializeValue(it)) } + docMd?.let { result["doc_md"] = it } + isPausedUponCreation?.let { result["is_paused_upon_creation"] = it } + + // Decorated fields (kept with __type/__var encoding, NOT unwrapped) + if (defaultArgs.isNotEmpty()) { + result["default_args"] = serializeValue(defaultArgs) + } + if (accessControl != null) { + // access_control is always serialized when not null, even if empty + result["access_control"] = serializeValue(accessControl) + } + + // Fields excluded when matching schema defaults + if (catchup) result["catchup"] = true + if (failFast) result["fail_fast"] = true + if (maxActiveTasks != Dag.DEFAULT_MAX_ACTIVE_TASKS) result["max_active_tasks"] = maxActiveTasks + if (maxActiveRuns != Dag.DEFAULT_MAX_ACTIVE_RUNS) result["max_active_runs"] = maxActiveRuns + if (maxConsecutiveFailedDagRuns != Dag.DEFAULT_MAX_CONSECUTIVE_FAILED_DAG_RUNS) { + result["max_consecutive_failed_dag_runs"] = maxConsecutiveFailedDagRuns + } + if (renderTemplateAsNativeObj) result["render_template_as_native_obj"] = true + + // dag_display_name — excluded when it equals dag_id (the default) + if (dagDisplayName != null && dagDisplayName != id) { + result["dag_display_name"] = dagDisplayName + } + + // Collection fields — serialized then unwrapped; excluded when empty + if (tags.isNotEmpty()) result["tags"] = unwrapTypeEncoding(serializeValue(tags)) + if (ownerLinks.isNotEmpty()) result["owner_links"] = unwrapTypeEncoding(serializeValue(ownerLinks)) + + return result +} + +/** Serialize a single DAG to a dict. Exposed for cross-language validation testing. */ +internal fun serializeDag(dag: Dag): Serialized = dag.serialize(dag.dagId, "", "") + +// --------------------------------------------------------------------------- +// Top-level envelope — matches Python's DagSerialization.to_dict +// --------------------------------------------------------------------------- + +private fun computeRelativeFileloc( + fileloc: String, + bundlePath: String, +): String { + if (fileloc.isEmpty()) return "" + if (bundlePath.isEmpty()) return "." + val rel = Path.of(bundlePath).relativize(Path.of(fileloc)).toString() + return rel.ifEmpty { "." } +} + +internal fun DagParsingResult.serialize(): Serialized { + val relativeFileloc = computeRelativeFileloc(fileloc, bundlePath) + val result = + mapOf( + "type" to "DagFileParsingResult", + "fileloc" to fileloc, + "serialized_dags" to + dags.entries.map { (id, d) -> + mapOf("data" to mapOf("__version" to 3, "dag" to d.serialize(id, fileloc, relativeFileloc))) + }, + ) + + logger.debug("Serialized DAG parsing result", mapOf("result" to result)) + + return result +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Supervisor.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Supervisor.kt new file mode 100644 index 0000000000000..190eb1706b7e2 --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Supervisor.kt @@ -0,0 +1,394 @@ +package org.apache.airflow.sdk.execution + +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.async +import kotlinx.coroutines.coroutineScope +import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext +import org.apache.airflow.sdk.ensureTrailingSlash +import org.apache.airflow.sdk.execution.api.client.ApiClient +import org.apache.airflow.sdk.execution.api.model.TIEnterRunningPayload +import org.apache.airflow.sdk.execution.api.model.TIRunContext +import org.apache.airflow.sdk.execution.api.model.TISuccessStatePayload +import org.apache.airflow.sdk.execution.api.model.TITerminalStatePayload +import org.apache.airflow.sdk.execution.api.model.TerminalStateNonSuccess +import org.apache.airflow.sdk.execution.api.route.TaskInstancesApi +import org.apache.airflow.sdk.execution.api.route.XComsApi +import retrofit2.Call +import java.io.InputStream +import java.io.OutputStream +import java.net.InetAddress +import java.net.ServerSocket +import java.net.Socket +import java.time.LocalDate +import java.time.OffsetDateTime +import java.util.UUID +import org.apache.airflow.sdk.execution.api.model.BundleInfo as ExecutionBundleInfo +import org.apache.airflow.sdk.execution.api.model.TaskInstance as ExecutionTaskInstance +import org.apache.airflow.sdk.execution.api.model.TaskInstanceState as ExecutionTaskInstanceState + +data class SupervisorTaskInstance( + val id: UUID, + val taskId: String, + val dagId: String, + val runId: String, + val tryNumber: Int, + val dagVersionId: UUID, + val mapIndex: Int?, + val contextCarrier: Map? = null, +) + +data class SupervisorBundleInfo( + val name: String, + val version: String?, +) + +data class SupervisorRequest( + val mainClass: String, + val classpath: String, + val executionApiBaseUrl: String, + val token: String, + val workerName: String, + val userName: String, + val dagRelPath: String, + val bundleInfo: SupervisorBundleInfo, + val taskInstance: SupervisorTaskInstance, + val sentryIntegration: String = "", + val onLogLine: suspend (String) -> Unit = {}, +) + +data class SupervisorResult( + val finalState: ExecutionTaskInstanceState, + val exitCode: Int, +) + +/** + * Retrofit interface for reporting task instance terminal state to the Execution API. + * + * Mirrors the Python SDK's `TaskInstanceOperations.succeed()` and `.finish()` methods + * (see `airflow/sdk/api/client.py`), both of which call `PATCH /task-instances/{id}/state` + * with [TISuccessStatePayload] or [TITerminalStatePayload] respectively. + * + * Why not use the generated [TaskInstancesApi.tiUpdateState]? + * The OpenAPI code generator flattens the endpoint's `oneOf` discriminated union into a single + * class [org.apache.airflow.sdk.execution.api.model.TiPatchPayload] whose `StateEnum` only + * contains `UP_FOR_RETRY`. It cannot represent `"success"`, `"failed"`, or `"skipped"`, and its + * method signature does not accept [TISuccessStatePayload] or [TITerminalStatePayload]. + * This interface works around that limitation by binding the same endpoint with the correct + * payload types that the generator *did* produce correctly as standalone classes. + */ +private interface TaskInstanceStateApi { + @retrofit2.http.Headers("Content-Type:application/json") + @retrofit2.http.PATCH("task-instances/{task_instance_id}/state") + fun succeed( + @retrofit2.http.Path("task_instance_id") id: UUID, + @retrofit2.http.Body payload: TISuccessStatePayload, + @retrofit2.http.Header("Airflow-API-Version") version: LocalDate?, + ): Call + + @retrofit2.http.Headers("Content-Type:application/json") + @retrofit2.http.PATCH("task-instances/{task_instance_id}/state") + fun finish( + @retrofit2.http.Path("task_instance_id") id: UUID, + @retrofit2.http.Body payload: TITerminalStatePayload, + @retrofit2.http.Header("Airflow-API-Version") version: LocalDate?, + ): Call +} + +object Supervisor { + private const val CONNECT_TIMEOUT_MS = 15_000 + private val loopback: InetAddress = InetAddress.getByName("127.0.0.1") + + suspend fun run(request: SupervisorRequest): SupervisorResult { + val execApi = executionApiClient(request.executionApiBaseUrl, request.token) + val execClient = HttpExecApiClient(execApi) + val startDate = OffsetDateTime.now() + + return withContext(Dispatchers.IO) { + coroutineScope { + ServerSocket(0, 1, loopback).use { commServer -> + ServerSocket(0, 1, loopback).use { logsServer -> + commServer.soTimeout = CONNECT_TIMEOUT_MS + logsServer.soTimeout = CONNECT_TIMEOUT_MS + + val process = startBundleProcess(request.classpath, request.mainClass, commServer.localPort, logsServer.localPort) + val stdoutPump = + launch(Dispatchers.IO) { + streamLines(process.inputStream, request.onLogLine) + } + val stderrPump = + launch(Dispatchers.IO) { + streamLines(process.errorStream, request.onLogLine) + } + try { + val (commSocket, logsSocket) = acceptConnections(commServer, logsServer) + + commSocket.use { comm -> + logsSocket.use { logs -> + val logPump = + launch(Dispatchers.IO) { + streamLines(logs.getInputStream(), request.onLogLine) + } + + val taskInstance = request.taskInstance.toExecutionTaskInstance(request.workerName) + val tiContext = startTask(execApi, taskInstance, startDate, process, request.workerName, request.userName) + + TaskSdkFrames.writeRequest( + comm.getOutputStream(), + 0, + request.toStartupDetails(taskInstance, tiContext, startDate), + ) + + val finalState = serveTaskSdkRequests(comm, execApi, execClient, taskInstance.id) + val exitCode = process.waitFor() + logPump.join() + stdoutPump.join() + stderrPump.join() + + SupervisorResult( + finalState = if (exitCode == 0) finalState else ExecutionTaskInstanceState.FAILED, + exitCode = exitCode, + ) + } + } + } catch (e: Exception) { + process.destroy() + throw e + } + } + } + } + } + } + + internal suspend fun streamLines( + input: InputStream, + onLogLine: suspend (String) -> Unit, + ) { + withContext(Dispatchers.IO) { + input.bufferedReader().useLines { lines -> + for (line in lines) { + onLogLine(line) + } + } + } + } + + private fun serveTaskSdkRequests( + comm: Socket, + execApi: ApiClient, + execClient: HttpExecApiClient, + taskInstanceId: UUID, + ): ExecutionTaskInstanceState { + val input = comm.getInputStream() + val output = comm.getOutputStream() + + while (true) { + val frame = TaskSdkFrames.readFrame(input, TaskSdkFrames.toSupervisorTypes) + when (val message = frame.body ?: return ExecutionTaskInstanceState.FAILED) { + is GetConnection -> + reply(frame.id, output) { + execClient.getConnection(message.id) + } + is GetVariable -> + reply(frame.id, output) { + execClient.getVariable(message.key) + } + is GetXCom -> + reply(frame.id, output) { + execClient.getXCom( + key = message.key, + dagId = message.dagId, + taskId = message.taskId, + runId = message.runId, + mapIndex = message.mapIndex, + includePriorDates = message.includePriorDates, + ) + } + is SetXCom -> + reply(frame.id, output) { + setXCom(execApi, message) + null + } + is SucceedTask -> { + succeed(execApi, taskInstanceId, message) + return ExecutionTaskInstanceState.SUCCESS + } + is TaskState -> { + finish(execApi, taskInstanceId, message) + return ExecutionTaskInstanceState.fromValue(message.state) + } + is ErrorResponse -> throw IllegalStateException("[${message.error}] ${message.detail}") + else -> throw IllegalStateException("Unsupported Task SDK message type ${message::class.java.name}") + } + } + } + + private fun succeed( + execApi: ApiClient, + taskInstanceId: UUID, + message: SucceedTask, + ) { + execApi.send { + succeed( + taskInstanceId, + TISuccessStatePayload() + .endDate(message.endDate) + .taskOutlets(message.taskOutlets) + .outletEvents(message.outletEvents), + HttpExecApiClient.version, + ) + } + } + + private fun finish( + execApi: ApiClient, + taskInstanceId: UUID, + message: TaskState, + ) { + execApi.send { + finish( + taskInstanceId, + TITerminalStatePayload() + .state(TerminalStateNonSuccess.fromValue(message.state)) + .endDate(message.endDate), + HttpExecApiClient.version, + ) + } + } + + private fun reply( + requestId: Int, + output: OutputStream, + block: () -> Any?, + ) { + try { + TaskSdkFrames.writeResponse(output, requestId, body = block()) + } catch (e: Exception) { + TaskSdkFrames.writeResponse( + output, + requestId, + error = + ErrorResponse().also { + it.error = "generic_error" + it.detail = mapOf("message" to (e.message ?: e::class.java.name)) + }, + ) + } + } + + private suspend fun acceptConnections( + commServer: ServerSocket, + logsServer: ServerSocket, + ): Pair = + coroutineScope { + val comm = async(Dispatchers.IO) { commServer.accept() } + val logs = async(Dispatchers.IO) { logsServer.accept() } + comm.await() to logs.await() + } + + private fun startBundleProcess( + classpath: String, + mainClass: String, + commPort: Int, + logsPort: Int, + ): Process { + val command = + listOf( + "java", + "-classpath", + classpath, + mainClass, + "--comm=${loopback.hostAddress}:$commPort", + "--logs=${loopback.hostAddress}:$logsPort", + ) + return ProcessBuilder(command) + .redirectOutput(ProcessBuilder.Redirect.PIPE) + .redirectError(ProcessBuilder.Redirect.PIPE) + .start() + } + + private fun executionApiClient( + baseUrl: String, + token: String, + ) = ApiClient("JWTBearer").apply { + setBearerToken(token) + adapterBuilder.baseUrl(baseUrl.ensureTrailingSlash()) + } + + private fun setXCom( + execApi: ApiClient, + request: SetXCom, + ) { + execApi.send { + setXcom( + request.dagId, + request.runId, + request.taskId, + request.key, + request.mapIndex, + null, + HttpExecApiClient.version, + request.value, + ) + } + } + + private fun startTask( + api: ApiClient, + taskInstance: ExecutionTaskInstance, + startDate: OffsetDateTime, + process: Process, + workerName: String, + userName: String, + ): TIRunContext = + api.communicate { + tiRun( + taskInstance.id, + TIEnterRunningPayload() + .hostname(workerName) + .unixname(userName) + .pid(process.pid().toInt()) + .startDate(startDate), + HttpExecApiClient.version, + ) + } + + private fun SupervisorTaskInstance.toExecutionTaskInstance(workerName: String) = + ExecutionTaskInstance().also { + it.id = id + it.taskId = taskId + it.dagId = dagId + it.runId = runId + it.tryNumber = tryNumber + it.dagVersionId = dagVersionId + it.mapIndex = mapIndex + it.hostname = workerName + it.contextCarrier = contextCarrier + } + + private fun SupervisorRequest.toStartupDetails( + taskInstance: ExecutionTaskInstance, + tiContext: TIRunContext, + startDate: OffsetDateTime, + ) = StartupDetails().also { + it.ti = taskInstance + it.dagRelPath = dagRelPath + it.bundleInfo = + ExecutionBundleInfo().also { info -> + info.name = bundleInfo.name + info.version = bundleInfo.version + } + it.tiContext = tiContext + it.startDate = startDate + it.sentryIntegration = sentryIntegration + } +} + +private inline fun ApiClient.send(block: Q.() -> Call<*>) { + val service = createService(Q::class.java) + val response = block(service).execute() + if (!response.isSuccessful) { + throw IllegalStateException("[${response.message()}] $response (from $service)") + } +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskRunner.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskRunner.kt new file mode 100644 index 0000000000000..ed61d57149923 --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskRunner.kt @@ -0,0 +1,28 @@ +package org.apache.airflow.sdk.execution + +import org.apache.airflow.sdk.Bundle +import org.apache.airflow.sdk.Client + +object TaskRunner { + fun run( + bundle: Bundle, + request: StartupDetails, + comm: CoordinatorComm, + ): Any = run(bundle, request, Client(request, CoordinatorClient(comm))) + + internal fun run( + bundle: Bundle, + request: StartupDetails, + client: Client, + ): Any { + val task = bundle.dags[request.ti.dagId]?.tasks[request.ti.taskId] ?: return TaskState("removed") + val instance = task.getDeclaredConstructor().newInstance() + return try { + instance.execute(client) + SucceedTask() + } catch (e: Exception) { + e.printStackTrace() + TaskState("failed") + } + } +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskSdkFrames.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskSdkFrames.kt new file mode 100644 index 0000000000000..76e666dbb23f5 --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskSdkFrames.kt @@ -0,0 +1,235 @@ +package org.apache.airflow.sdk.execution + +import com.fasterxml.jackson.databind.DeserializationFeature +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.databind.SerializationFeature +import com.fasterxml.jackson.databind.util.StdDateFormat +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule +import org.apache.airflow.sdk.execution.api.model.ConnectionResponse +import org.apache.airflow.sdk.execution.api.model.VariableResponse +import org.apache.airflow.sdk.execution.api.model.XComResponse +import org.msgpack.core.MessagePack +import java.io.ByteArrayOutputStream +import java.io.EOFException +import java.io.InputStream +import java.io.OutputStream + +typealias TaskSdkMessageDecoder = (Map<*, *>) -> Any + +object TaskSdkFrames { + private val mapper = + ObjectMapper().apply { + configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS) + registerModule(JavaTimeModule()) + registerModule(TimestampToJavaOffsetDateTimeModule()) + setDateFormat(StdDateFormat().withColonInTimeZone(true)) + } + + private val inferredTypes = + mapOf( + ConnectionResponse::class to "ConnectionResult", + ErrorResponse::class to "ErrorResponse", + StartupDetails::class to "StartupDetails", + VariableResponse::class to "VariableResult", + XComResponse::class to "XComResult", + ) + + private val toBundleClientTypes: Map = + mapOf( + "ConnectionResult" to mapperDecoder(ConnectionResponse::class.java), + "ErrorResponse" to mapperDecoder(ErrorResponse::class.java), + "VariableResult" to mapperDecoder(VariableResponse::class.java), + "XComResult" to mapperDecoder(XComResponse::class.java), + ) + + val toDagProcessorTypes: Map = + toBundleClientTypes + + mapOf( + "DagFileParseRequest" to mapperDecoder(DagFileParseRequest::class.java), + ) + + val toTaskTypes: Map = + toBundleClientTypes + + mapOf( + "StartupDetails" to mapperDecoder(StartupDetails::class.java), + ) + + // The Java bundle process can act as either Python's DagProcessor or Task runtime, so + // its inbound decoder is the union of both message sets. + val toBundleProcessTypes: Map = toDagProcessorTypes + toTaskTypes + + val toSupervisorTypes: Map = + mapOf( + "ErrorResponse" to mapperDecoder(ErrorResponse::class.java), + "GetConnection" to { body -> GetConnection(id = body.string("conn_id")) }, + "GetVariable" to { body -> GetVariable(key = body.string("key")) }, + "GetXCom" to { + GetXCom( + key = it.string("key"), + dagId = it.string("dag_id"), + taskId = it.string("task_id"), + runId = it.string("run_id"), + mapIndex = it.intOrNull("map_index"), + includePriorDates = it.boolean("include_prior_dates", default = false), + ) + }, + "SetXCom" to { + SetXCom( + key = it.string("key"), + value = it["value"] ?: error("Missing 'value'"), + dagId = it.string("dag_id"), + taskId = it.string("task_id"), + runId = it.string("run_id"), + mapIndex = it.int("map_index"), + ) + }, + "SucceedTask" to { SucceedTask() }, + "TaskState" to { body -> TaskState(state = body.string("state")) }, + ) + + fun encodeRequest( + id: Int, + body: Any, + ): ByteArray = encodeFrame(id, body, error = null, isResponse = false) + + fun encodeResponse( + id: Int, + body: Any? = null, + error: ErrorResponse? = null, + ): ByteArray = encodeFrame(id, body, error = error, isResponse = true) + + fun writeRequest( + output: OutputStream, + id: Int, + body: Any, + ) = writeFrame(output, encodeRequest(id, body)) + + fun writeResponse( + output: OutputStream, + id: Int, + body: Any? = null, + error: ErrorResponse? = null, + ) = writeFrame(output, encodeResponse(id, body, error)) + + fun decode( + bytes: ByteArray, + bodyTypes: Map, + ): IncomingFrame { + val unpacker = MessagePack.newDefaultUnpacker(bytes) + val headerSize = unpacker.unpackArrayHeader() + check(headerSize >= 2) { "Unexpected Task SDK frame arity $headerSize" } + + val id = unpacker.unpackInt() + val rawBody = unpacker.unpackAny() + val rawError = if (headerSize >= 3) unpacker.unpackAny() else null + unpacker.close() + + val body = + decodeMessage(rawError, bodyTypes = mapOf("ErrorResponse" to mapperDecoder(ErrorResponse::class.java))) + ?: decodeMessage(rawBody, bodyTypes) + + return IncomingFrame(id, body) + } + + fun readFrame( + input: InputStream, + bodyTypes: Map, + ): IncomingFrame = decode(readBytes(input, readLengthPrefix(input)), bodyTypes) + + fun lengthPrefix(length: Int) = + byteArrayOf( + (length shr 24).toByte(), + (length shr 16).toByte(), + (length shr 8).toByte(), + length.toByte(), + ) + + fun readLengthPrefix(input: InputStream): Int = parseLengthPrefix(readBytes(input, 4)) + + fun parseLengthPrefix(prefix: ByteArray): Int { + check(prefix.size == 4) { "Need 4 prefix bytes" } + return prefix.fold(0) { acc, byte -> (acc shl 8) or (byte.toInt() and 0xff) } + } + + fun readBytes( + input: InputStream, + length: Int, + ): ByteArray { + val bytes = input.readNBytes(length) + if (bytes.size != length) { + throw EOFException("Expected $length bytes but only received ${bytes.size}") + } + return bytes + } + + private fun writeFrame( + output: OutputStream, + payload: ByteArray, + ) { + output.write(lengthPrefix(payload.size)) + output.write(payload) + output.flush() + } + + private fun encodeFrame( + id: Int, + body: Any?, + error: ErrorResponse?, + isResponse: Boolean, + ): ByteArray { + val payload = ByteArrayOutputStream() + val packer = MessagePack.newDefaultPacker(payload) + packer.packArrayHeader(if (isResponse) 3 else 2) + packer.packInt(id) + packer.packAny(body?.let(::toBody)) + if (isResponse) { + packer.packAny(error?.let(::toBody)) + } + packer.close() + return payload.toByteArray() + } + + private fun decodeMessage( + raw: Any?, + bodyTypes: Map, + ): Any? { + val body = raw as? Map<*, *> ?: return raw + val typeName = body["type"] as? String ?: return body + val decoder = bodyTypes[typeName] ?: error("Unsupported Task SDK message type $typeName") + return decoder(body) + } + + private fun mapperDecoder(targetType: Class<*>): TaskSdkMessageDecoder = { body -> mapper.convertValue(body, targetType) } + + @Suppress("UNCHECKED_CAST") + private fun toBody(value: Any): Map = + when (value) { + is Map<*, *> -> value as Map + else -> + (mapper.convertValue(value, MutableMap::class.java) as MutableMap).also { body -> + inferredTypes[value::class]?.let { typeName -> body.putIfAbsent("type", typeName) } + } + } + + private fun Map<*, *>.string(key: String): String = this[key] as? String ?: error("Missing '$key'") + + private fun Map<*, *>.int(key: String): Int = intOrNull(key) ?: error("Missing integer '$key'") + + private fun Map<*, *>.intOrNull(key: String): Int? = + when (val value = this[key]) { + null -> null + is Number -> value.toInt() + else -> error("Expected integer '$key', got ${value::class.java}") + } + + private fun Map<*, *>.boolean( + key: String, + default: Boolean, + ): Boolean = + when (val value = this[key]) { + null -> default + is Boolean -> value + else -> error("Expected boolean '$key', got ${value::class.java}") + } +} diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleScannerTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleScannerTest.kt new file mode 100644 index 0000000000000..8bdacd8ce4e79 --- /dev/null +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleScannerTest.kt @@ -0,0 +1,197 @@ +package org.apache.airflow.sdk + +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertNotNull +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.DisplayName +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.io.TempDir +import java.io.File +import java.nio.file.Files +import java.nio.file.Path +import java.util.jar.JarOutputStream +import java.util.jar.Manifest +import java.util.zip.ZipEntry + +private const val STUB_MAIN_CLASS = "com.example.Main" + +class BundleScannerTest { + @Test + @DisplayName("parseDagIdsFromYaml extracts dag ids from metadata YAML") + fun parseDagIdsFromYaml() { + val yaml = + """ + dags: + java_example: + tasks: + - extract + - transform + - load + another_dag: + tasks: + - task_a + """.trimIndent() + + assertEquals(setOf("java_example", "another_dag"), parseDagIdsFromYaml(yaml)) + } + + @Test + @DisplayName("parseDagIdsFromYaml returns empty set for missing dags key") + fun parseDagIdsFromYamlEmpty() { + assertEquals(emptySet(), parseDagIdsFromYaml("other_key: value")) + } + + @Test + @DisplayName("readBundleDagIds reads metadata from JAR with Airflow-Java-SDK-Metadata manifest") + fun readBundleDagIdsFromJar( + @TempDir tempDir: Path, + ) { + createBundleJar(tempDir, mapOf("my_dag" to listOf("t1", "t2"))) + + assertEquals(setOf("my_dag"), readBundleDagIds(tempDir)) + } + + @Test + @DisplayName("readBundleDagIds returns empty set when no JAR has metadata") + fun readBundleDagIdsNoMetadata( + @TempDir tempDir: Path, + ) { + val manifest = Manifest() + manifest.mainAttributes.putValue("Manifest-Version", "1.0") + JarOutputStream(Files.newOutputStream(tempDir.resolve("plain.jar")), manifest).use {} + + assertEquals(emptySet(), readBundleDagIds(tempDir)) + } + + @Test + @DisplayName("scanBundles discovers bundles in subdirectories") + fun scanBundlesNestedLayout( + @TempDir tempDir: Path, + ) { + val bundleA = Files.createDirectory(tempDir.resolve("bundle-a")) + createBundleJar(bundleA, mapOf("dag_a" to listOf("t1"))) + + val bundleB = Files.createDirectory(tempDir.resolve("bundle-b")) + createBundleJar(bundleB, mapOf("dag_b" to listOf("t2"), "dag_c" to listOf("t3"))) + + val result = scanBundles(tempDir) + + assertEquals(STUB_MAIN_CLASS, result["dag_a"]?.mainClass) + assertEquals(STUB_MAIN_CLASS, result["dag_b"]?.mainClass) + assertEquals(STUB_MAIN_CLASS, result["dag_c"]?.mainClass) + // dag_a classpath should point to bundle-a JARs, not bundle-b + assertTrue(result["dag_a"]!!.classpath.contains("bundle-a")) + assertTrue(result["dag_b"]!!.classpath.contains("bundle-b")) + } + + @Test + @DisplayName("scanBundles supports flat layout where bundlesDir itself contains JARs") + fun scanBundlesFlatLayout( + @TempDir tempDir: Path, + ) { + createBundleJar(tempDir, mapOf("flat_dag" to listOf("t1"))) + + val result = scanBundles(tempDir) + + assertNotNull(result["flat_dag"]) + assertEquals(STUB_MAIN_CLASS, result["flat_dag"]!!.mainClass) + } + + @Test + @DisplayName("scanBundles finds metadata JAR among many dependency JARs") + fun scanBundlesFlatWithDependencyJars( + @TempDir tempDir: Path, + ) { + // Simulate installDist layout: one bundle JAR with metadata among plain dependency JARs. + val plainManifest = Manifest() + plainManifest.mainAttributes.putValue("Manifest-Version", "1.0") + JarOutputStream(Files.newOutputStream(tempDir.resolve("aaa-dep.jar")), plainManifest).use {} + JarOutputStream(Files.newOutputStream(tempDir.resolve("zzz-dep.jar")), plainManifest).use {} + + // A JAR with no manifest at all. + JarOutputStream(Files.newOutputStream(tempDir.resolve("no-manifest.jar"))).use {} + + createBundleJar(tempDir, mapOf("my_dag" to listOf("t1"))) + + val result = scanBundles(tempDir) + + assertNotNull(result["my_dag"]) + assertEquals(STUB_MAIN_CLASS, result["my_dag"]!!.mainClass) + // All 4 JARs should be on the classpath. + val cpEntries = result["my_dag"]!!.classpath.split(File.pathSeparator) + assertEquals(4, cpEntries.size) + } + + @Test + @DisplayName("scanBundles resolves distZip layout where bundlesDir is the lib directory") + fun scanBundlesDistZipLibDir( + @TempDir tempDir: Path, + ) { + // Simulate: unzip example.zip → example/lib/*.jar, BUNDLES_DIR=.../example/lib + val libDir = Files.createDirectories(tempDir.resolve("example").resolve("lib")) + + // 30 plain dependency JARs + val plainManifest = Manifest() + plainManifest.mainAttributes.putValue("Manifest-Version", "1.0") + for (name in listOf( + "annotations-23.0.0", + "converter-jackson-3.0.0", + "jackson-core-2.21.1", + "jackson-databind-2.21.1", + "kotlin-stdlib-2.3.0", + "kotlinx-coroutines-core-jvm-1.10.2", + "msgpack-core-0.9.11", + "okhttp-4.12.0", + "retrofit-3.0.0", + "sdk", + )) { + JarOutputStream(Files.newOutputStream(libDir.resolve("$name.jar")), plainManifest).use {} + } + + // The bundle JAR with metadata, named "example.jar" (alphabetically after some deps) + createBundleJar(libDir, mapOf("java_example" to listOf("extract", "transform", "load")), "example.jar") + + // bundlesDir points directly at lib/ + val result = scanBundles(libDir) + + assertNotNull(result["java_example"], "java_example should be discovered in flat lib/ layout") + assertEquals(STUB_MAIN_CLASS, result["java_example"]!!.mainClass) + assertEquals(11, result["java_example"]!!.classpath.split(File.pathSeparator).size) + } + + @Test + @DisplayName("scanBundles returns empty map for nonexistent directory") + fun scanBundlesNonexistentDir() { + assertEquals(emptyMap(), scanBundles(Path.of("/nonexistent/dir"))) + } + + private fun createBundleJar( + dir: Path, + dags: Map>, + fileName: String = "bundle.jar", + ): Path { + val manifest = Manifest() + manifest.mainAttributes.putValue("Manifest-Version", "1.0") + manifest.mainAttributes.putValue("Main-Class", STUB_MAIN_CLASS) + manifest.mainAttributes.putValue(METADATA_MANIFEST_KEY, "airflow-metadata.yaml") + + val jarPath = dir.resolve(fileName) + JarOutputStream(Files.newOutputStream(jarPath), manifest).use { jos -> + jos.putNextEntry(ZipEntry("airflow-metadata.yaml")) + val yaml = + buildString { + appendLine("dags:") + for ((dagId, tasks) in dags) { + appendLine(" $dagId:") + appendLine(" tasks:") + for (task in tasks) { + appendLine(" - $task") + } + } + } + jos.write(yaml.toByteArray()) + jos.closeEntry() + } + return jarPath + } +} diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleTest.kt new file mode 100644 index 0000000000000..785394185e7cd --- /dev/null +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleTest.kt @@ -0,0 +1,28 @@ +package org.apache.airflow.sdk + +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.DisplayName +import org.junit.jupiter.api.Test + +internal class BundleTest { + @Test + @DisplayName("Should index dags by dagId") + fun shouldIndexDagsByDagId() { + val dag = Dag("dag") + + val bundle = Bundle("0", listOf(dag)) + + Assertions.assertEquals(mapOf("dag" to dag), bundle.dags) + } + + @Test + @DisplayName("Should reject duplicate dag ids") + fun shouldRejectDuplicateDagIds() { + val error = + Assertions.assertThrows(IllegalArgumentException::class.java) { + Bundle("0", listOf(Dag("dag"), Dag("dag"))) + } + + Assertions.assertEquals("Duplicate dagId in bundle: dag", error.message) + } +} diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/ConfigTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/ConfigTest.kt new file mode 100644 index 0000000000000..6ccdeef500e42 --- /dev/null +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/ConfigTest.kt @@ -0,0 +1,138 @@ +package org.apache.airflow.sdk + +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertNull +import org.junit.jupiter.api.Assertions.assertThrows +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.DisplayName +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.io.TempDir +import java.nio.file.Files +import java.nio.file.Path + +class ConfigTest { + // -- SdkConfig: env var resolution -- + + @Test + @DisplayName("executionApiUrl uses AIRFLOW__CORE__EXECUTION_API_SERVER_URL env var") + fun executionApiUrlFromEnv() { + val config = SdkConfig(env = mapOf("AIRFLOW__CORE__EXECUTION_API_SERVER_URL" to "http://127.0.0.1:8080/execution")) + + assertEquals("http://127.0.0.1:8080/execution/", config.executionApiUrl) + } + + @Test + @DisplayName("executionApiUrl throws when missing") + fun executionApiUrlThrowsWhenMissing() { + val config = SdkConfig(env = emptyMap()) + + val error = assertThrows(WorkerError::class.java) { config.executionApiUrl } + assertTrue(error.message!!.contains("execution_api_server_url")) + } + + @Test + @DisplayName("executionApiUrl falls back to execution.api_url") + fun executionApiUrlFallback() { + val config = SdkConfig(env = mapOf("AIRFLOW__EXECUTION__API_URL" to "http://127.0.0.1:8080/execution")) + + assertEquals("http://127.0.0.1:8080/execution/", config.executionApiUrl) + } + + @Test + @DisplayName("jwtExpirationTime defaults to 30 seconds") + fun jwtExpirationTimeDefault() { + val config = SdkConfig(env = emptyMap()) + + assertEquals(30, config.jwtExpirationTime) + } + + // -- SdkConfig: YAML resolution -- + + @Test + @DisplayName("config values are loaded from YAML file") + fun yamlConfigLoading( + @TempDir tempDir: Path, + ) { + val yamlContent = + """ + core: + execution_api_server_url: "http://yaml-host:8080/execution/" + + sdk: + bundles_dir: "./bundles" + + api_auth: + jwt_secret: "yaml-secret" + jwt_issuer: "yaml-issuer" + jwt_expiration_time: 45 + """.trimIndent() + + val yamlPath = tempDir.resolve("java-sdk.yaml") + Files.writeString(yamlPath, yamlContent) + + val config = SdkConfig(env = emptyMap(), yamlOverride = yamlPath) + + assertEquals("http://yaml-host:8080/execution/", config.executionApiUrl) + assertEquals("yaml-secret", config.jwtSecret) + assertEquals("yaml-issuer", config.jwtIssuer) + assertEquals(45, config.jwtExpirationTime) + assertEquals(Path.of("./bundles"), config.bundlesDir) + } + + @Test + @DisplayName("env vars take precedence over YAML values") + fun envTakesPrecedenceOverYaml( + @TempDir tempDir: Path, + ) { + val yamlContent = + """ + core: + execution_api_server_url: "http://yaml-host:8080/execution/" + api_auth: + jwt_secret: "yaml-secret" + """.trimIndent() + + val yamlPath = tempDir.resolve("java-sdk.yaml") + Files.writeString(yamlPath, yamlContent) + + val config = + SdkConfig( + env = + mapOf( + "AIRFLOW__CORE__EXECUTION_API_SERVER_URL" to "http://env-host:9090/execution/", + "AIRFLOW__API_AUTH__JWT_SECRET" to "env-secret", + ), + yamlOverride = yamlPath, + ) + + assertEquals("http://env-host:9090/execution/", config.executionApiUrl) + assertEquals("env-secret", config.jwtSecret) + } + + @Test + @DisplayName("config works with no YAML file and no env vars for optional values") + fun noYamlFile() { + val config = SdkConfig(env = emptyMap()) + + assertEquals(30, config.jwtExpirationTime) + assertNull(config.bundlesDir) + } + + @Test + @DisplayName("YAML file is resolved from AIRFLOW_HOME") + fun yamlFromAirflowHome( + @TempDir tempDir: Path, + ) { + val yamlContent = + """ + api_auth: + jwt_secret: "home-secret" + """.trimIndent() + + Files.writeString(tempDir.resolve("java-sdk.yaml"), yamlContent) + + val config = SdkConfig(env = mapOf("AIRFLOW_HOME" to tempDir.toString())) + + assertEquals("home-secret", config.jwtSecret) + } +} diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/CoordinatorCommTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/CoordinatorCommTest.kt new file mode 100644 index 0000000000000..d53f8f09c795d --- /dev/null +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/CoordinatorCommTest.kt @@ -0,0 +1,113 @@ +package org.apache.airflow.sdk + +import io.ktor.utils.io.ByteChannel +import io.ktor.utils.io.availableForRead +import io.ktor.utils.io.readAvailable +import kotlinx.coroutines.runBlocking +import org.apache.airflow.sdk.execution.CoordinatorComm +import org.apache.airflow.sdk.execution.DagFileParseRequest +import org.apache.airflow.sdk.execution.IncomingFrame +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.DisplayName +import org.junit.jupiter.api.Test +import kotlin.text.split + +fun byteArrayFromHexString(hexString: String): ByteArray = + hexString + .split(' ', '\r', '\n') + .filter { it.isNotEmpty() } + .map { it.toUByte(16).toByte() } + .toByteArray() + +@OptIn(ExperimentalUnsignedTypes::class) +internal class CoordinatorCommTest { + lateinit var comm: CoordinatorComm + lateinit var reader: ByteChannel + lateinit var writer: ByteChannel + + @BeforeEach + fun setUp() { + reader = ByteChannel(autoFlush = true) + writer = ByteChannel(autoFlush = true) + comm = CoordinatorComm(Bundle("0", listOf(Dag("dag"))), reader, writer) + } + + @Test + @DisplayName("handleIncoming should produce parse result") + fun handleIncomingShouldProduceParseResult() { + val frame = IncomingFrame(0, DagFileParseRequest().apply { file = ":memory:" }) + + // prefix + DagFileParsingResult payload for a minimal DAG. + + /* prefix + + [ + 0, + { + "type": "DagFileParsingResult", + "fileloc": ":memory:", + "serialized_dags": [ + { + "data": { + "__version": 3, + "dag": { + "dag_id": "dag", + "fileloc": ":memory:", + "relative_fileloc": ".", + "timezone": "UTC", + "timetable": { + "__type": "airflow.timetables.simple.NullTimetable", + "__var": {} + }, + "tasks": [] + } + } + } + ] + } + ] + */ + val expected = + byteArrayFromHexString( + """ + | 00 00 01 e6 + | 92 00 83 a4 74 79 70 65 b4 44 61 67 46 69 6c 65 50 61 72 73 69 6e 67 52 + | 65 73 75 6c 74 a7 66 69 6c 65 6c 6f 63 a8 3a 6d 65 6d 6f 72 79 3a af 73 + | 65 72 69 61 6c 69 7a 65 64 5f 64 61 67 73 91 81 a4 64 61 74 61 82 a9 5f + | 5f 76 65 72 73 69 6f 6e 03 a3 64 61 67 8c a6 64 61 67 5f 69 64 a3 64 61 + | 67 a7 66 69 6c 65 6c 6f 63 a8 3a 6d 65 6d 6f 72 79 3a b0 72 65 6c 61 74 + | 69 76 65 5f 66 69 6c 65 6c 6f 63 a1 2e a8 74 69 6d 65 7a 6f 6e 65 a3 55 + | 54 43 a9 74 69 6d 65 74 61 62 6c 65 82 a6 5f 5f 74 79 70 65 d9 27 61 69 + | 72 66 6c 6f 77 2e 74 69 6d 65 74 61 62 6c 65 73 2e 73 69 6d 70 6c 65 2e + | 4e 75 6c 6c 54 69 6d 65 74 61 62 6c 65 a5 5f 5f 76 61 72 80 a5 74 61 73 + | 6b 73 90 b0 64 61 67 5f 64 65 70 65 6e 64 65 6e 63 69 65 73 90 aa 74 61 + | 73 6b 5f 67 72 6f 75 70 8b a9 5f 67 72 6f 75 70 5f 69 64 c0 b2 67 72 6f + | 75 70 5f 64 69 73 70 6c 61 79 5f 6e 61 6d 65 a0 af 70 72 65 66 69 78 5f + | 67 72 6f 75 70 5f 69 64 c3 a7 74 6f 6f 6c 74 69 70 a0 a8 75 69 5f 63 6f + | 6c 6f 72 ae 43 6f 72 6e 66 6c 6f 77 65 72 42 6c 75 65 aa 75 69 5f 66 67 + | 63 6f 6c 6f 72 a4 23 30 30 30 a8 63 68 69 6c 64 72 65 6e 80 b2 75 70 73 + | 74 72 65 61 6d 5f 67 72 6f 75 70 5f 69 64 73 90 b4 64 6f 77 6e 73 74 72 + | 65 61 6d 5f 67 72 6f 75 70 5f 69 64 73 90 b1 75 70 73 74 72 65 61 6d 5f + | 74 61 73 6b 5f 69 64 73 90 b3 64 6f 77 6e 73 74 72 65 61 6d 5f 74 61 73 + | 6b 5f 69 64 73 90 a9 65 64 67 65 5f 69 6e 66 6f 80 a6 70 61 72 61 6d 73 + | 90 a8 64 65 61 64 6c 69 6e 65 c0 b1 61 6c 6c 6f 77 65 64 5f 72 75 6e 5f + | 74 79 70 65 73 c0 + """.trimMargin(), + ) + + val buffer = ByteArray(1024) { 0 } // Change ByteArray size if assertTrue below fails. + var count = 0 + runBlocking { + comm.handleIncoming(frame) + if (writer.availableForRead > 0) { + count = writer.readAvailable(buffer) + } + } + Assertions.assertTrue(count < buffer.size, "Please increase buffer size above") + + Assertions.assertEquals(expected.size, count) + + val received = buffer.sliceArray(0.until(count)) + Assertions.assertArrayEquals(expected, received) + } +} diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/CommsTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/CommsTest.kt new file mode 100644 index 0000000000000..fcca3ebf45f2b --- /dev/null +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/CommsTest.kt @@ -0,0 +1,86 @@ +package org.apache.airflow.sdk.execution + +import org.apache.airflow.sdk.byteArrayFromHexString +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.DisplayName +import org.junit.jupiter.api.Test +import java.time.OffsetDateTime +import java.time.ZoneOffset + +class CommsTest { + @Test + @DisplayName("Should decode startup details") + fun shouldDecodeStartupDetails() { + // [2, msg, null] with msg coming from + // https://github.com/astronomer/airflow/blob/f39c8da8/task-sdk/tests/task_sdk/execution_time/test_comms.py#L73-L108 + val data = + """ + 92 02 88 a4 74 79 70 65 ae 53 74 61 72 74 75 70 44 65 74 61 69 6c 73 a2 74 69 86 a2 69 64 d9 24 + 34 64 38 32 38 61 36 32 2d 61 34 31 37 2d 34 39 33 36 2d 61 37 61 36 2d 32 62 33 66 61 62 61 63 + 65 63 61 62 a7 74 61 73 6b 5f 69 64 a1 61 aa 74 72 79 5f 6e 75 6d 62 65 72 01 a6 72 75 6e 5f 69 + 64 a1 62 a6 64 61 67 5f 69 64 a1 63 ae 64 61 67 5f 76 65 72 73 69 6f 6e 5f 69 64 d9 24 34 64 38 + 32 38 61 36 32 2d 61 34 31 37 2d 34 39 33 36 2d 61 37 61 36 2d 32 62 33 66 61 62 61 63 65 63 61 + 62 aa 74 69 5f 63 6f 6e 74 65 78 74 85 a7 64 61 67 5f 72 75 6e 8c a6 64 61 67 5f 69 64 a1 63 a6 + 72 75 6e 5f 69 64 a1 62 ac 6c 6f 67 69 63 61 6c 5f 64 61 74 65 b4 32 30 32 34 2d 31 32 2d 30 31 + 54 30 31 3a 30 30 3a 30 30 5a b3 64 61 74 61 5f 69 6e 74 65 72 76 61 6c 5f 73 74 61 72 74 b4 32 + 30 32 34 2d 31 32 2d 30 31 54 30 30 3a 30 30 3a 30 30 5a b1 64 61 74 61 5f 69 6e 74 65 72 76 61 + 6c 5f 65 6e 64 b4 32 30 32 34 2d 31 32 2d 30 31 54 30 31 3a 30 30 3a 30 30 5a aa 73 74 61 72 74 + 5f 64 61 74 65 b4 32 30 32 34 2d 31 32 2d 30 31 54 30 31 3a 30 30 3a 30 30 5a a9 72 75 6e 5f 61 + 66 74 65 72 b4 32 30 32 34 2d 31 32 2d 30 31 54 30 31 3a 30 30 3a 30 30 5a a8 65 6e 64 5f 64 61 + 74 65 c0 a8 72 75 6e 5f 74 79 70 65 a6 6d 61 6e 75 61 6c a5 73 74 61 74 65 a7 73 75 63 63 65 73 + 73 a4 63 6f 6e 66 c0 b5 63 6f 6e 73 75 6d 65 64 5f 61 73 73 65 74 5f 65 76 65 6e 74 73 90 a9 6d + 61 78 5f 74 72 69 65 73 00 ac 73 68 6f 75 6c 64 5f 72 65 74 72 79 c2 a9 76 61 72 69 61 62 6c 65 + 73 c0 ab 63 6f 6e 6e 65 63 74 69 6f 6e 73 c0 a4 66 69 6c 65 a9 2f 64 65 76 2f 6e 75 6c 6c aa 73 + 74 61 72 74 5f 64 61 74 65 b4 32 30 32 34 2d 31 32 2d 30 31 54 30 31 3a 30 30 3a 30 30 5a ac 64 + 61 67 5f 72 65 6c 5f 70 61 74 68 a9 2f 64 65 76 2f 6e 75 6c 6c ab 62 75 6e 64 6c 65 5f 69 6e 66 + 6f 82 a4 6e 61 6d 65 a8 61 6e 79 2d 6e 61 6d 65 a7 76 65 72 73 69 6f 6e ab 61 6e 79 2d 76 65 72 + 73 69 6f 6e b2 73 65 6e 74 72 79 5f 69 6e 74 65 67 72 61 74 69 6f 6e a0 c0 + """.trimIndent() + val result = CoordinatorComm.decode(byteArrayFromHexString(data)) + Assertions.assertInstanceOf(IncomingFrame::class.java, result) + Assertions.assertInstanceOf(StartupDetails::class.java, result.body) + } + + @Test + @DisplayName("Should serialize all fields") + fun shouldEncodeSucceedTask() { + val endDate = OffsetDateTime.of(2024, 12, 1, 1, 0, 0, 0, ZoneOffset.UTC) + val bytes = CoordinatorComm.encode(OutgoingFrame(3, SucceedTask(endDate))) + val actual = bytes.toHexString(HexFormat { bytes { byteSeparator = " " } }) + + val expected = + """ + 92 03 86 a5 73 74 61 74 65 a7 73 75 63 63 65 73 73 a8 65 6e 64 5f 64 61 74 65 b4 32 30 32 34 2d + 31 32 2d 30 31 54 30 31 3a 30 30 3a 30 30 5a ac 74 61 73 6b 5f 6f 75 74 6c 65 74 73 90 ad 6f 75 + 74 6c 65 74 5f 65 76 65 6e 74 73 90 b2 72 65 6e 64 65 72 65 64 5f 6d 61 70 5f 69 6e 64 65 78 c0 + a4 74 79 70 65 ab 53 75 63 63 65 65 64 54 61 73 6b + """.trimIndent().replace('\n', ' ') + + Assertions.assertEquals(expected, actual) + } + + @Test + @DisplayName("Should decode requests to the supervisor") + fun shouldDecodeSupervisorRequest() { + val result = TaskSdkFrames.decode(TaskSdkFrames.encodeRequest(5, GetVariable("demo")), TaskSdkFrames.toSupervisorTypes) + + Assertions.assertEquals(5, result.id) + Assertions.assertEquals(GetVariable("demo"), result.body) + } + + @Test + @DisplayName("Should decode protocol errors from the response error slot") + fun shouldDecodeErrorResponseFromErrorSlot() { + val error = + ErrorResponse().also { + it.error = "generic_error" + it.detail = mapOf("message" to "boom") + } + + val result = CoordinatorComm.decode(TaskSdkFrames.encodeResponse(7, error = error)) + + Assertions.assertEquals(7, result.id) + Assertions.assertInstanceOf(ErrorResponse::class.java, result.body) + Assertions.assertEquals("generic_error", (result.body as ErrorResponse).error) + } +} diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/DagParserTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/DagParserTest.kt new file mode 100644 index 0000000000000..8781e437fe268 --- /dev/null +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/DagParserTest.kt @@ -0,0 +1,28 @@ +package org.apache.airflow.sdk.execution + +import org.apache.airflow.sdk.Bundle +import org.apache.airflow.sdk.Dag +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.DisplayName +import org.junit.jupiter.api.Test + +internal class DagParserTest { + lateinit var parser: DagParser + + @BeforeEach + fun setUp() { + parser = DagParser(":memory:", "") + } + + @Test + @DisplayName("Should produce serialized dag") + fun shouldProduceSerializedDag() { + val bundle = Bundle("0", listOf(Dag("dag"))) + val result = parser.parse(bundle) + Assertions.assertEquals( + DagParsingResult(":memory:", "", bundle.dags), + result, + ) + } +} diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SerializationCompatibilityTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SerializationCompatibilityTest.kt new file mode 100644 index 0000000000000..a2a6118777793 --- /dev/null +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SerializationCompatibilityTest.kt @@ -0,0 +1,151 @@ +package org.apache.airflow.sdk.execution + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.databind.SerializationFeature +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory +import org.apache.airflow.sdk.Dag +import org.junit.jupiter.api.Assertions.assertFalse +import org.junit.jupiter.api.Assertions.assertNotNull +import org.junit.jupiter.api.DynamicTest +import org.junit.jupiter.api.TestFactory +import java.io.File +import java.time.Duration +import java.time.Instant + +/** + * Reads test_dags.yaml, constructs Dags from the parameters, serialises each + * one with the Java SDK, and writes the result to serialized_java.json for + * cross-language comparison with the Python output. + * + * Each YAML test-case is turned into a JUnit 5 dynamic test so failures are + * reported individually. + * + * After running: + * python validation/serialization/compare.py \ + * validation/serialization/serialized_python.json \ + * validation/serialization/serialized_java.json + */ +class SerializationCompatibilityTest { + companion object { + private val yamlMapper = ObjectMapper(YAMLFactory()) + private val jsonMapper = + ObjectMapper().apply { + enable(SerializationFeature.INDENT_OUTPUT) + configure(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS, true) + } + + /** Resolve a project-relative path that works from any Gradle working dir. */ + private fun projectFile(relative: String): File { + // Gradle may run from the repo root or from sdk/ + var dir = File(System.getProperty("user.dir")) + while (dir.parentFile != null) { + val candidate = File(dir, relative) + if (candidate.exists()) return candidate + dir = dir.parentFile + } + // Fallback: try relative to cwd + return File(relative) + } + } + + // ----------------------------------------------------------------------- + // YAML → Dag construction + // ----------------------------------------------------------------------- + + @Suppress("UNCHECKED_CAST") + private fun constructDag(params: Map): Dag { + val dagId = params["dag_id"] as String + + return Dag( + dagId = dagId, + description = params["description"] as? String, + schedule = params["schedule"] as? String, + startDate = (params["start_date"] as? String)?.let { Instant.parse(it) }, + endDate = (params["end_date"] as? String)?.let { Instant.parse(it) }, + defaultArgs = (params["default_args"] as? Map) ?: emptyMap(), + maxActiveTasks = (params["max_active_tasks"] as? Number)?.toInt() ?: Dag.DEFAULT_MAX_ACTIVE_TASKS, + maxActiveRuns = (params["max_active_runs"] as? Number)?.toInt() ?: Dag.DEFAULT_MAX_ACTIVE_RUNS, + maxConsecutiveFailedDagRuns = + (params["max_consecutive_failed_dag_runs"] as? Number)?.toInt() + ?: Dag.DEFAULT_MAX_CONSECUTIVE_FAILED_DAG_RUNS, + dagrunTimeout = (params["dagrun_timeout_seconds"] as? Number)?.let { Duration.ofSeconds(it.toLong()) }, + catchup = params["catchup"] as? Boolean ?: false, + docMd = params["doc_md"] as? String, + accessControl = + (params["access_control"] as? Map>)?.mapValues { (_, resources) -> + resources.mapValues { (_, perms) -> + when (perms) { + is List<*> -> perms.filterIsInstance().toSet() + is Set<*> -> perms.filterIsInstance().toSet() + else -> setOf(perms.toString()) + } + } + }, + isPausedUponCreation = params["is_paused_upon_creation"] as? Boolean, + tags = (params["tags"] as? List<*>)?.filterIsInstance()?.toSet() ?: emptySet(), + ownerLinks = (params["owner_links"] as? Map) ?: emptyMap(), + failFast = params["fail_fast"] as? Boolean ?: false, + dagDisplayName = params["dag_display_name"] as? String, + renderTemplateAsNativeObj = params["render_template_as_native_obj"] as? Boolean ?: false, + params = params["params"] as? Map, + ) + } + + // ----------------------------------------------------------------------- + // Dynamic test generation + // ----------------------------------------------------------------------- + + @Suppress("UNCHECKED_CAST") + @TestFactory + fun `serialise all YAML test cases`(): List { + val yamlFile = projectFile("validation/serialization/test_dags.yaml") + if (!yamlFile.exists()) { + return listOf( + DynamicTest.dynamicTest("test_dags.yaml not found — skipping") { + println("WARNING: ${yamlFile.absolutePath} not found, skipping serialisation tests") + }, + ) + } + + val root = yamlMapper.readValue(yamlFile, Map::class.java) as Map + val testCases = root["test_cases"] as List> + + // Accumulate results for JSON output + val allResults = mutableMapOf() + + val tests = + testCases.map { case -> + val name = case["name"] as String + val params = case["params"] as Map + + DynamicTest.dynamicTest(name) { + val dag = constructDag(params) + val serialized = serializeDag(dag) + + // Basic assertions + assertNotNull(serialized["dag_id"], "dag_id must be present") + assertNotNull(serialized["timetable"], "timetable must be present") + assertNotNull(serialized["tasks"], "tasks must be present") + assertFalse( + serialized.containsKey("__error"), + "serialisation must not produce an error entry", + ) + + allResults[name] = serialized + } + } + + // After all dynamic tests, write the combined JSON. + // We add a final "meta" test that writes the file. + val writeTest = + DynamicTest.dynamicTest("_write_serialized_java_json") { + val outputDir = projectFile("validation/serialization") + outputDir.mkdirs() + val outputFile = File(outputDir, "serialized_java.json") + jsonMapper.writeValue(outputFile, allResults.toSortedMap()) + println("Wrote ${allResults.size} serialised DAGs -> ${outputFile.absolutePath}") + } + + return tests + writeTest + } +} diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TaskRunnerTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TaskRunnerTest.kt new file mode 100644 index 0000000000000..90593f136eb45 --- /dev/null +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TaskRunnerTest.kt @@ -0,0 +1,110 @@ +package org.apache.airflow.sdk.execution + +import org.apache.airflow.sdk.Bundle +import org.apache.airflow.sdk.Client +import org.apache.airflow.sdk.Dag +import org.apache.airflow.sdk.Task +import org.apache.airflow.sdk.execution.api.model.BundleInfo +import org.apache.airflow.sdk.execution.api.model.TIRunContext +import org.apache.airflow.sdk.execution.api.model.TaskInstance +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.DisplayName +import org.junit.jupiter.api.Test +import java.time.OffsetDateTime +import java.util.UUID + +class TaskRunnerTest { + @Test + @DisplayName("Should execute task and return success") + fun shouldExecuteTaskAndReturnSuccess() { + val result = TaskRunner.run(bundleWith("success", SuccessTask::class.java), startupDetails(taskId = "success"), noOpClient()) + + Assertions.assertInstanceOf(SucceedTask::class.java, result) + } + + @Test + @DisplayName("Should return removed when task is missing") + fun shouldReturnRemovedWhenTaskIsMissing() { + val result = TaskRunner.run(bundleWith("other", SuccessTask::class.java), startupDetails(taskId = "missing"), noOpClient()) + + Assertions.assertInstanceOf(TaskState::class.java, result) + Assertions.assertEquals("removed", (result as TaskState).state) + } + + @Test + @DisplayName("Should return failed when task throws") + fun shouldReturnFailedWhenTaskThrows() { + val result = TaskRunner.run(bundleWith("failing", FailingTask::class.java), startupDetails(taskId = "failing"), noOpClient()) + + Assertions.assertInstanceOf(TaskState::class.java, result) + Assertions.assertEquals("failed", (result as TaskState).state) + } + + private fun bundleWith( + taskId: String, + taskClass: Class, + ): Bundle { + val dag = Dag("test_dag") + dag.addTask(taskId, taskClass) + return Bundle("1", listOf(dag)) + } + + private fun startupDetails(taskId: String): StartupDetails = + StartupDetails().also { + it.ti = + TaskInstance().also { taskInstance -> + taskInstance.id = UUID.randomUUID() + taskInstance.taskId = taskId + taskInstance.dagId = "test_dag" + taskInstance.runId = "manual__2026-03-31T00:00:00+00:00" + taskInstance.tryNumber = 1 + taskInstance.dagVersionId = UUID.randomUUID() + } + it.dagRelPath = "/dev/null" + it.bundleInfo = + BundleInfo().also { info -> + info.name = "bundle" + info.version = "1" + } + it.startDate = OffsetDateTime.parse("2026-03-31T00:00:00Z") + it.tiContext = TIRunContext() + it.sentryIntegration = "" + } + + private fun noOpClient() = + Client( + startupDetails(taskId = "unused"), + object : org.apache.airflow.sdk.execution.Client { + override fun getConnection(id: String) = throw UnsupportedOperationException("not used in test") + + override fun getVariable(key: String) = throw UnsupportedOperationException("not used in test") + + override fun getXCom( + key: String, + dagId: String, + taskId: String, + runId: String, + mapIndex: Int?, + includePriorDates: Boolean, + ) = throw UnsupportedOperationException("not used in test") + + override fun setXCom( + key: String, + value: Any, + dagId: String, + taskId: String, + runId: String, + mapIndex: Int, + ): Unit = throw UnsupportedOperationException("not used in test") + }, + ) + + class SuccessTask : Task { + override fun execute(client: Client) { + } + } + + class FailingTask : Task { + override fun execute(client: Client): Unit = throw IllegalStateException("boom") + } +} diff --git a/java-sdk/settings.gradle.kts b/java-sdk/settings.gradle.kts new file mode 100644 index 0000000000000..0892437c13d3e --- /dev/null +++ b/java-sdk/settings.gradle.kts @@ -0,0 +1,13 @@ +/* + * This file was generated by the Gradle 'init' task. + * + * The settings file is used to specify which projects to include in your build. + * For more detailed information on multi-project builds, please refer to https://docs.gradle.org/9.2.1/userguide/multi_project_builds.html in the Gradle documentation. + */ + +plugins { + id("org.gradle.toolchains.foojay-resolver-convention").version("0.10.0") +} + +rootProject.name = "airflow-java-sdk" +include("example", "sdk") diff --git a/java-sdk/validation/serialization/compare.py b/java-sdk/validation/serialization/compare.py new file mode 100644 index 0000000000000..464f247852c40 --- /dev/null +++ b/java-sdk/validation/serialization/compare.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Compare serialised DAG output from Python and Java SDKs. + +Usage: + python compare.py serialized_python.json serialized_java.json +""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +# Fields that are inherently environment-specific and should be ignored. +IGNORED_FIELDS = frozenset( + { + "fileloc", + "relative_fileloc", + "_processor_dags_folder", + } +) + +# Floating-point tolerance for timestamp / duration comparisons. +FLOAT_TOLERANCE = 1e-6 + + +# --------------------------------------------------------------------------- +# Normalisation +# --------------------------------------------------------------------------- + + +def _normalise(obj, *, _depth: int = 0): + """Recursively normalise an object for comparison.""" + if isinstance(obj, dict): + return { + k: _normalise(v, _depth=_depth + 1) for k, v in sorted(obj.items()) if k not in IGNORED_FIELDS + } + if isinstance(obj, list): + return [_normalise(item, _depth=_depth + 1) for item in obj] + if isinstance(obj, float): + return round(obj, 6) + return obj + + +# --------------------------------------------------------------------------- +# Deep diff +# --------------------------------------------------------------------------- + + +def _deep_diff(python_obj, java_obj, path: str = "") -> list[str]: + """Return a list of human-readable difference descriptions.""" + diffs: list[str] = [] + + if type(python_obj) is not type(java_obj): + # Allow int ↔ float (e.g. 0 vs 0.0) + if isinstance(python_obj, (int, float)) and isinstance(java_obj, (int, float)): + if abs(float(python_obj) - float(java_obj)) > FLOAT_TOLERANCE: + diffs.append(f"{path}: {python_obj!r} != {java_obj!r}") + return diffs + diffs.append( + f"{path}: type mismatch — Python {type(python_obj).__name__}" + f" vs Java {type(java_obj).__name__}" + f" (py={python_obj!r}, java={java_obj!r})" + ) + return diffs + + if isinstance(python_obj, dict): + all_keys = set(python_obj) | set(java_obj) + for key in sorted(all_keys): + child_path = f"{path}.{key}" if path else key + if key not in java_obj: + diffs.append(f"{child_path}: present in Python but missing in Java") + elif key not in python_obj: + diffs.append(f"{child_path}: present in Java but missing in Python") + else: + diffs.extend(_deep_diff(python_obj[key], java_obj[key], child_path)) + elif isinstance(python_obj, list): + if len(python_obj) != len(java_obj): + diffs.append(f"{path}: list length — Python {len(python_obj)} vs Java {len(java_obj)}") + for i, (p, j) in enumerate(zip(python_obj, java_obj)): + diffs.extend(_deep_diff(p, j, f"{path}[{i}]")) + elif isinstance(python_obj, float): + if abs(python_obj - java_obj) > FLOAT_TOLERANCE: + diffs.append(f"{path}: {python_obj!r} != {java_obj!r}") + elif python_obj != java_obj: + diffs.append(f"{path}: {python_obj!r} != {java_obj!r}") + + return diffs + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + + +def main() -> None: + if len(sys.argv) != 3: + print(f"Usage: {sys.argv[0]} ") + sys.exit(1) + + python_path = Path(sys.argv[1]) + java_path = Path(sys.argv[2]) + + with open(python_path) as fh: + python_data: dict = json.load(fh) + with open(java_path) as fh: + java_data: dict = json.load(fh) + + all_names = sorted(set(python_data) | set(java_data)) + total = len(all_names) + passed = 0 + failed = 0 + + for name in all_names: + if name not in python_data: + print(f"SKIP {name} (missing in Python output)") + continue + if name not in java_data: + print(f"SKIP {name} (missing in Java output)") + continue + + py_dag = python_data[name] + jv_dag = java_data[name] + + # Skip error entries + if isinstance(py_dag, dict) and "__error" in py_dag: + print(f"SKIP {name} (Python error: {py_dag['__error']})") + continue + if isinstance(jv_dag, dict) and "__error" in jv_dag: + print(f"SKIP {name} (Java error: {jv_dag['__error']})") + continue + + py_norm = _normalise(py_dag) + jv_norm = _normalise(jv_dag) + + diffs = _deep_diff(py_norm, jv_norm) + if diffs: + failed += 1 + print(f"FAIL {name}") + for d in diffs: + print(f" {d}") + else: + passed += 1 + print(f"PASS {name}") + + print(f"\n{'=' * 60}") + print(f"Total: {total} | Passed: {passed} | Failed: {failed}") + if failed: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/java-sdk/validation/serialization/serialize_python.py b/java-sdk/validation/serialization/serialize_python.py new file mode 100644 index 0000000000000..7cc6ca00eb3d4 --- /dev/null +++ b/java-sdk/validation/serialization/serialize_python.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Serialize DAGs using the Python Airflow SDK for cross-language comparison. + +Prerequisites: + - Airflow core and task-sdk installed (pip install -e …) + - PyYAML installed (pip install pyyaml) + +Usage: + python serialize_python.py test_dags.yaml serialized_python.json +""" + +from __future__ import annotations + +import json +import sys +from datetime import datetime, timedelta +from pathlib import Path + +import yaml + +# --------------------------------------------------------------------------- +# YAML params → Python DAG constructor kwargs +# --------------------------------------------------------------------------- + + +def _yaml_params_to_dag_kwargs(params: dict) -> dict: + """Convert language-agnostic YAML params to Python DAG constructor kwargs.""" + kwargs: dict = {} + for key, value in params.items(): + if key in ("start_date", "end_date") and isinstance(value, str): + kwargs[key] = datetime.fromisoformat(value) + elif key == "dagrun_timeout_seconds": + kwargs["dagrun_timeout"] = timedelta(seconds=value) + elif key == "tags" and isinstance(value, list): + kwargs["tags"] = set(value) + elif key == "access_control" and isinstance(value, dict): + # Convert innermost lists → sets (permissions) + kwargs["access_control"] = { + role: { + resource: set(perms) if isinstance(perms, list) else perms + for resource, perms in resources.items() + } + for role, resources in value.items() + } + elif key == "params": + kwargs["params"] = value + else: + kwargs[key] = value + return kwargs + + +# --------------------------------------------------------------------------- +# JSON helper +# --------------------------------------------------------------------------- + + +def _make_json_safe(obj): + """Handle types that json.dumps cannot serialise natively.""" + if isinstance(obj, (set, frozenset)): + return sorted(obj) + if isinstance(obj, bytes): + return obj.decode("utf-8") + raise TypeError(f"Object of type {type(obj).__name__} is not JSON serializable") + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + + +def main() -> None: + if len(sys.argv) != 3: + print(f"Usage: {sys.argv[0]} ") + sys.exit(1) + + yaml_path = Path(sys.argv[1]) + output_path = Path(sys.argv[2]) + + with open(yaml_path) as fh: + test_data = yaml.safe_load(fh) + + # Lazy-import Airflow so the script fails fast on missing args first. + from airflow.sdk import DAG + from airflow.serialization.serialized_objects import DagSerialization + + results: dict[str, dict] = {} + for case in test_data["test_cases"]: + name = case["name"] + kwargs = _yaml_params_to_dag_kwargs(case["params"]) + print(f" [{name}] ", end="") + try: + dag = DAG(**kwargs) + serialized = DagSerialization.serialize_dag(dag) + results[name] = serialized + print("OK") + except Exception as exc: + print(f"ERROR: {exc}") + results[name] = {"__error": str(exc)} + + with open(output_path, "w") as fh: + json.dump(results, fh, indent=2, sort_keys=True, default=_make_json_safe) + + print(f"\nWrote {len(results)} serialised DAGs → {output_path}") + + +if __name__ == "__main__": + main() diff --git a/java-sdk/validation/serialization/test_dags.yaml b/java-sdk/validation/serialization/test_dags.yaml new file mode 100644 index 0000000000000..f2eb014b60721 --- /dev/null +++ b/java-sdk/validation/serialization/test_dags.yaml @@ -0,0 +1,198 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Cross-language DAG serialization test cases. +# +# Each entry defines DAG constructor parameters in a language-agnostic way. +# Both the Python and Java serialization scripts read this file, construct +# DAGs from the params, and write the serialized output for comparison. +# +# Type conventions: +# start_date / end_date — ISO-8601 string, parsed to datetime / Instant +# dagrun_timeout_seconds — number of seconds, parsed to timedelta / Duration +# tags — list of strings, converted to set on both sides +# access_control — nested map; innermost lists become sets + +test_cases: + # ---- schedule variants ------------------------------------------------ + - name: "minimal_dag" + params: + dag_id: "example_dag" + + - name: "schedule_daily" + params: + dag_id: "example_dag" + schedule: "@daily" + + - name: "schedule_hourly" + params: + dag_id: "example_dag" + schedule: "@hourly" + + - name: "schedule_once" + params: + dag_id: "example_dag" + schedule: "@once" + + - name: "schedule_continuous" + params: + dag_id: "example_dag" + schedule: "@continuous" + max_active_runs: 1 + + - name: "schedule_cron" + params: + dag_id: "example_dag" + schedule: "0 0 * * *" + + - name: "schedule_cron_complex" + params: + dag_id: "example_dag" + schedule: "30 2 */3 * 1-5" + + # ---- simple scalar fields -------------------------------------------- + - name: "with_description" + params: + dag_id: "example_dag" + description: "This is an example DAG for testing serialization." + + - name: "with_doc_md" + params: + dag_id: "example_dag" + doc_md: "# Example DAG\n\nThis is **markdown** documentation." + + - name: "with_dag_display_name" + params: + dag_id: "example_dag" + dag_display_name: "My Example Pipeline" + + - name: "with_dag_display_name_same_as_id" + params: + dag_id: "example_dag" + dag_display_name: "example_dag" + + # ---- boolean / numeric fields ----------------------------------------- + - name: "with_catchup_and_start_date" + params: + dag_id: "example_dag" + schedule: "@daily" + start_date: "2024-01-01T00:00:00Z" + catchup: true + + - name: "with_fail_fast" + params: + dag_id: "example_dag" + fail_fast: true + + - name: "with_render_template_as_native_obj" + params: + dag_id: "example_dag" + render_template_as_native_obj: true + + - name: "with_is_paused_upon_creation_true" + params: + dag_id: "example_dag" + is_paused_upon_creation: true + + - name: "with_is_paused_upon_creation_false" + params: + dag_id: "example_dag" + is_paused_upon_creation: false + + - name: "with_max_active_tasks" + params: + dag_id: "example_dag" + max_active_tasks: 32 + max_active_runs: 8 + max_consecutive_failed_dag_runs: 5 + + - name: "with_dagrun_timeout" + params: + dag_id: "example_dag" + dagrun_timeout_seconds: 3600 + + - name: "with_start_and_end_date" + params: + dag_id: "example_dag" + start_date: "2024-01-01T00:00:00Z" + end_date: "2024-12-31T23:59:59Z" + + # ---- collection fields ------------------------------------------------ + - name: "with_tags" + params: + dag_id: "example_dag" + tags: ["alpha", "beta", "gamma"] + + - name: "with_owner_links" + params: + dag_id: "example_dag" + owner_links: + data_team: "https://example.com/data-team" + dev_team: "https://example.com/dev-team" + + # ---- decorated / typed fields ----------------------------------------- + - name: "with_default_args" + params: + dag_id: "example_dag" + default_args: + retries: 3 + owner: "test_owner" + + - name: "with_params" + params: + dag_id: "example_dag" + params: + param_string: "value1" + param_int: 42 + param_bool: true + + - name: "with_access_control" + params: + dag_id: "example_dag" + access_control: + viewer_role: + DAGs: + - "can_read" + editor_role: + DAGs: + - "can_read" + - "can_edit" + + # ---- complex combined ------------------------------------------------- + - name: "complex_dag" + params: + dag_id: "complex_dag" + schedule: "0 */6 * * *" + description: "A complex DAG with many parameters" + start_date: "2024-06-15T10:30:00Z" + catchup: true + max_active_tasks: 32 + max_active_runs: 8 + max_consecutive_failed_dag_runs: 3 + dagrun_timeout_seconds: 7200 + tags: ["complex", "production", "etl"] + owner_links: + data_team: "https://example.com/data-team" + fail_fast: true + dag_display_name: "Complex ETL Pipeline" + doc_md: "# Complex Pipeline\nHandles ETL processing." + default_args: + retries: 2 + owner: "data_engineering" + params: + env: "production" + batch_size: 1000 From f3ba258594a0262cfa960301ccb1a1b4aba0f849 Mon Sep 17 00:00:00 2001 From: LIU ZHE YOU Date: Thu, 23 Apr 2026 11:37:31 +0800 Subject: [PATCH 02/16] Rename language field as sdk in java-sdk --- java-sdk/dags/stub_dag.py | 4 ++-- .../src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/java-sdk/dags/stub_dag.py b/java-sdk/dags/stub_dag.py index 65a0832e6a8d3..bd606e58e5976 100644 --- a/java-sdk/dags/stub_dag.py +++ b/java-sdk/dags/stub_dag.py @@ -27,11 +27,11 @@ def python_task_1(ti): ti.xcom_push(value="value-pushed-from-python_task_1", key="return_value") -@task.stub(language="java") +@task.stub(sdk="java") def extract(): ... -@task.stub(language="java") +@task.stub(sdk="java") def transform(): ... diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt index 21a398298662e..4f44b42b9bd6b 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt @@ -128,7 +128,7 @@ private fun Class.serialize( "task_id" to id, "task_type" to simpleName, "_task_module" to name.substringBeforeLast('.'), - "language" to "java", + "sdk" to "java", ) if (!dependants.isNullOrEmpty()) { data["downstream_task_ids"] = dependants.sorted() From 876179ab55d3b31486ee52f4c27abd4e215b0fd0 Mon Sep 17 00:00:00 2001 From: LIU ZHE YOU Date: Tue, 28 Apr 2026 08:52:27 +0800 Subject: [PATCH 03/16] Add ADRs for JavaSDK and Coordinator --- .../adr/0001-java-sdk-airflow-integration.md | 343 +++++++++++++ java-sdk/adr/0002-dag-parsing.md | 370 ++++++++++++++ java-sdk/adr/0003-workload-execution.md | 473 ++++++++++++++++++ java-sdk/adr/0004-pure-java-dags.md | 228 +++++++++ 4 files changed, 1414 insertions(+) create mode 100644 java-sdk/adr/0001-java-sdk-airflow-integration.md create mode 100644 java-sdk/adr/0002-dag-parsing.md create mode 100644 java-sdk/adr/0003-workload-execution.md create mode 100644 java-sdk/adr/0004-pure-java-dags.md diff --git a/java-sdk/adr/0001-java-sdk-airflow-integration.md b/java-sdk/adr/0001-java-sdk-airflow-integration.md new file mode 100644 index 0000000000000..1609e99a413e2 --- /dev/null +++ b/java-sdk/adr/0001-java-sdk-airflow-integration.md @@ -0,0 +1,343 @@ + + +# ADR-0001: Java SDK Airflow Integration + +## Status + +Accepted + +## Context + +Airflow's current execution model is Python-only: DAGs are Python files, tasks are Python callables, and the task runner forks a Python process. To support DAGs and tasks authored in other languages (starting with Java), we need an architecture that: + +- Allows entire DAGs to be written in a non-Python language (pure Java DAG). +- Allows non-Python tasks to coexist with Python tasks in the same DAG (`@task.stub`). +- Reuses the existing task-runner two-layer design (task-runner process + forked child process) so Airflow extensions (XCom backends, connections, variables) stay in Python. +- Is extensible to other languages (Go, Rust, etc.) without per-language changes to Airflow Core. + +The existing task runner already uses a two-layer design. When an executor wants to run a task, it starts a task-runner process that talks to Airflow Core through the Execution API, and forks another process that talks to the task-runner through TCP to run the actual task code. All the Airflow extensions simply go into the task-runner process, keeping them in Python. + +The only thing missing is a way for the task-runner process to run tasks in another language. + +## Decision + +### Writing a Non-Python Task + +There is one way to write a non-Python task: implement the language SDK's task interface. For Java, this is the `Task` interface with a single `execute(Client client)` method. The `Client` provides access to Airflow services (connections, variables, XCom). + +### Two Ways to Integrate Non-Python Tasks into a DAG + +We provide two approaches for integrating non-Python tasks into a DAG: + +**a) Pure Java DAG** — define the entire DAG in Java, with no Python file at all. +The Java SDK provides `DagBundle`, `Dag`, and `Task` interfaces: + +```java +public class JavaExample implements DagBundle { + + public static class Extract implements Task { + public void execute(Client client) throws Exception { + var connection = client.getConnection("test_http"); + client.setXCom(new Date().getTime()); + } + } + + public static class Transform implements Task { + public void execute(Client client) { + var extract_xcom = client.getXCom("extract"); + client.setXCom(new Date().getTime()); + } + } + + @Override + public List getDags() { + var dag = new Dag("java_example", null, "@daily"); + dag.addTask("extract", Extract.class, List.of()); + dag.addTask("transform", Transform.class, List.of("extract")); + return List.of(dag); + } +} +``` + +**b) `@task.stub` in a Python DAG** — for mixed-language pipelines where Python and +Java tasks coexist in the same DAG. The `@task.stub` syntax is already supported for +the Go SDK; the same pattern applies to Java: + +```python +@task() +def python_task_1(ti): + ti.xcom_push(value="from-python", key="return_value") + + +@task.stub(queue="java") +def extract(): ... + + +@task.stub(queue="java") +def transform(): ... + + +@dag(dag_id="java_example") +def simple_dag(): + python_task_1() >> extract() >> transform() +``` + +Both approaches are supported in parallel. A pure Java DAG needs no Python at all for authoring. A `@task.stub` DAG requires a Python file but lets you mix Python operators and non-Python tasks in a single pipeline. + +> **Note:** The current `DagBundle` interface used in pure Java DAGs is subject to review before the SDK reaches 1.0. Subclassing `Dag` directly may be a more natural fit and is being considered for post-OSS-integration. + +### The Coordinator Layer + +We introduce a **Coordinator** layer. When a DAG bundle is loaded, it not only tells Airflow how to find the DAGs (and tasks in them), but also how to *run* each task. Current Python tasks use a Python code path that runs them by forking. A new **Java Coordinator** instructs the task runner how to run tasks in JAR files. + +The base interface (`BaseCoordinator`) lives in `airflow.sdk.execution_time` and is selected automatically via `ProvidersManagerTaskRuntime`. The Java Coordinator lives in a provider under the `airflow.providers.sdk.java` namespace, and new language coordinators follow the same pattern. + +### Architecture Overview + +``` + Airflow Backend Language Runtime Subprocess (Java in this example) + ─────────────── ────────────────────────────────────────────────── + + ┌──────────────────────────────┐ + │ DAG File (Python or JAR) │ + │ │ + │ @task.stub(queue="java") │ + │ def my_java_task(): │ + │ ... │ + └──────────────┬───────────────┘ + │ + ┌──────────────▼───────────────┐ ┌──────────────────────────────┐ + │ DAG File Processor │ │ Runtime Subprocess (Java) │ + │ │ can_handle_dag │ │ + │ For each file in bundle: │ _file() == True │ dag_parsing_cmd() │ + │ ┌ coordinator handles it? ──┼───────────────────►│ │ + │ │ Yes ──► delegate parse │ │ Java SDK parses JAR, builds │ + │ │ No ──► Python path │ SDK Serialized │ SDK-compatible Serialized │ + │ │ │◄─── DAG JSON ──────┤ DAG JSON (sdk, tasks, etc.) │ + │ └ │ │ │ + └──────────────┬───────────────┘ └──────────────────────────────┘ + │ + ┌──────────────▼───────────────┐ + │ Metadata DB │ + │ │ + │ serialized_dag: { │ Stored as-is from the language runtime's + │ "relative_fileloc": │ SDK Serialized DAG JSON + │ "path/to/example.jar" │ + │ } │ + │ task_instance.queue │ + └──────────────┬───────────────┘ + │ + ┌──────────────▼───────────────┐ + │ Scheduler │ + │ │ + │ Reads queue from TI │ + │ ──► ExecuteTask workload │ + │ (includes queue) │ + └──────────────┬───────────────┘ + │ + ┌──────────────▼───────────────┐ ┌──────────────────────────────┐ + │ Execution API │ │ Runtime Subprocess (Java) │ + │ │ │ │ + │ TI.queue ──► Startup │ │ task_execution_cmd() │ + │ Details │ │ Executes task in JVM │ + └──────────────┬───────────────┘ │ │ + │ └──────────────▲───────────────┘ + ┌──────────────▼───────────────┐ │ + │ Task Runner │ │ + │ │ │ + │ QueueToCoordinatorMapper │ │ + │ maps queue via `[sdk] │ │ + │ queue_to_sdk` config ───────┼───────────────────────────────────┘ + │ to matching coordinator │ + └──────────────────────────────┘ +``` + +### The `BaseCoordinator` Interface + +This is the central abstraction that language providers implement. It lives in the Task SDK (`task-sdk/src/airflow/sdk/execution_time/coordinator.py`) and handles both DAG parsing and task execution for a specific language runtime. + +```python +class BaseCoordinator: + """ + Base coordinator for runtime-specific DAG file processing and task execution. + + Providers register subclasses in their ``provider.yaml`` under + ``coordinators``. Both ProvidersManager (airflow-core) and + ProvidersManagerTaskRuntime (task-sdk) discover coordinators through + this extension point. + + Subclasses represent a specific language runtime (Java, Go, etc.) and + implement three methods. The base class owns the full bridge lifecycle: + TCP servers, subprocess management, selector-based I/O loop, and cleanup. + """ + + sdk: str # e.g. "java", "go" — matches sdk field on operator/TI + + # Discovery (called by DAG File Processor) + + @classmethod + def can_handle_dag_file(cls, bundle_name: str, path: str | os.PathLike) -> bool: + """Return True if this coordinator should parse the file at *path*.""" + ... + + @classmethod + def get_code_from_file(cls, fileloc: str) -> str: + """Return the actual DAG code (the content of JavaExample.java in this case""" + ... + + # DAG Parsing (called in forked DagFileProcessor child process) + + @classmethod + def dag_parsing_cmd( + cls, + *, + dag_file_path: str, # Absolute path to DAG file + bundle_name: str, # Name of the DAG bundle + bundle_path: str, # Root path of the bundle + comm_addr: str, # host:port for msgpack comm channel + logs_addr: str, # host:port for structured JSON log channel + ) -> list[str]: + """Return the subprocess command for DAG file parsing.""" + ... + + # Task Execution (called in forked worker child process) + + @classmethod + def task_execution_cmd( + cls, + *, + what: TaskInstance, + dag_rel_path: str | os.PathLike, # Relative path to DAG file within bundle + bundle_info: BundleInfo, + comm_addr: str, + logs_addr: str, + ) -> list[str]: + """Return the subprocess command for task execution.""" + ... + + # Lifecycle (owned by base class, not overridden) + + @classmethod + def run_dag_parsing(cls, *, path, bundle_name, bundle_path) -> None: ... + + @classmethod + def run_task_execution(cls, *, what, dag_rel_path, bundle_info, startup_details) -> None: ... +``` + +### Provider Registration + +Language providers register their coordinators in `provider.yaml`: + +```yaml +# providers/sdk/java/provider.yaml +process-coordinators: + - airflow.providers.sdk.java.coordinator.JavaCoordinator +``` + +### Example: `JavaCoordinator` + +```python +class JavaCoordinator(BaseCoordinator): + sdk = "java" + + @classmethod + def can_handle_dag_file(cls, bundle_name, path): + """True when path is a JAR with a Main-Class manifest entry.""" + ... + + @classmethod + def dag_parsing_cmd(cls, *, dag_file_path, bundle_name, bundle_path, comm_addr, logs_addr): + main_class = find_main_class(Path(dag_file_path)) + return [ + "java", + "-classpath", + f"{bundle_path}/*", + main_class, + f"--comm={comm_addr}", + f"--logs={logs_addr}", + ] + + @classmethod + def task_execution_cmd(cls, *, what, dag_rel_path, bundle_info, comm_addr, logs_addr): + jar_path = Path(dag_rel_path) + main_class = find_main_class(jar_path) + return [ + "java", + "-classpath", + f"{jar_path.parent}/*", + main_class, + f"--comm={comm_addr}", + f"--logs={logs_addr}", + ] +``` + +### Integration Points — Required Changes + +**1. Decorator — DAG Author Interface** + +DAG authors declare a non-Python task using `@task.stub` and specify a queue: + +```python +@task.stub(queue="java") +def my_java_task(): ... +``` + +**2. Serialization — Each Language SDK Produces SDK-Compatible Serialized DAG JSON** + +Serialization is the language runtime's responsibility, not Airflow Core's. Each language SDK implements its own serializer that understands the language-specific DAG and task structure and produces a Task SDK-compatible Serialized DAG JSON — the same schema that the Python SDK's `SerializedDAG` produces. + +The language runtime subprocess returns this JSON to the DAG File Processor through the msgpack comm channel. The DAG File Processor and Airflow Core treat it identically to Python-serialized DAGs — it is stored as-is in the metadata DB. + +We have already added compatibility validation between the Python SDK and Java SDK serialized DAG JSON formats to ensure both produce structurally equivalent output. + +**3. Execution API — Task Queues Routed to the Worker** + +A new configuration is added to map each task's `queue` to a language runtime: + +```ini +[sdk] +queue_to_sdk = {"java": "java"} +``` + +This specifies tasks in the `java` queue should be routed to `JavaCoordinator` since it has `sdk = "java"`. + +## Consequences + +### New Interfaces + +| Component | New Interface | Change Type | +|-----------|--------------|-------------| +| `BaseCoordinator` | Abstract base defined in Task SDK | New class | +| `coordinators` | Provider extension point in `provider.yaml` | New extension point | +| `@task.stub` decorator | `queue: str \| None` parameter | Additive | +| `[sdk] queue_to_sdk` | Airflow configuration | New option | +| `_resolve_runtime_entrypoint` | Route by `queue` → `sdk` match | Behavioral | + +### What Becomes Easier + +- Adding a new language runtime requires only a `BaseCoordinator` subclass, a language SDK, and a `provider.yaml` entry — no changes to Airflow Core. +- DAG authors can mix Python and non-Python tasks in the same pipeline. +- The existing task-runner two-layer design is preserved, keeping all Airflow extensions in Python. + +### What Becomes Harder + +- Each language SDK must independently produce compatible serialized DAG JSON, which requires cross-language validation infrastructure. +- The coordinator subprocess bridge adds a TCP hop and process management overhead per non-Python task. +- Debugging non-Python tasks requires understanding the bridge layer between the task runner and the language runtime. diff --git a/java-sdk/adr/0002-dag-parsing.md b/java-sdk/adr/0002-dag-parsing.md new file mode 100644 index 0000000000000..235658d5460bd --- /dev/null +++ b/java-sdk/adr/0002-dag-parsing.md @@ -0,0 +1,370 @@ + + +# ADR-0002: DAG Parsing — Language-Specific DAG File Processing + +## Status + +Accepted + +## Context + +Airflow's standard DAG file processor only understands Python files. To support DAGs defined in other languages (Java, Go, Rust, etc.), the pipeline needs an extension point where a language-specific processor can intercept the parsing request, delegate to an external runtime, and return a result in the same format the Airflow scheduler expects. + +This ADR details the DAG parsing side of the coordinator architecture described in [ADR-0001](0001-java-sdk-airflow-integration.md). It starts with the generic model — the abstract contracts and expected behavior that any language must implement — then walks through Java as a concrete example. + +## Decision + +### Extension Point: `BaseCoordinator` + +A single abstract base class — `BaseCoordinator` — handles both DAG parsing and task execution. It is registered in `provider.yaml` under `coordinators`. For DAG parsing, a subclass must implement two methods: + +| Method | Signature | Responsibility | +|---|---|---| +| `can_handle_dag_file` | `(bundle_name, path) -> bool` | Return `True` if this coordinator should handle the given file. Default returns `False`; subclasses add language-specific checks (e.g., "is this a JAR with a Main-Class?"). | +| `dag_parsing_cmd` | `(dag_file_path, bundle_name, bundle_path, comm_addr, logs_addr) -> list[str]` | Return the full command to launch the language runtime. `comm_addr` and `logs_addr` are `host:port` strings the process must connect to. | + +### Registration + +In the provider's `provider.yaml`: + +```yaml +process-coordinators: + - airflow.providers.sdk..coordinator. +``` + +A single registration covers both DAG parsing and task execution — there are no separate `dag-file-processors` or `task-coordinators` keys. + +### Discovery: `_resolve_processor_target()` + +When `DagFileProcessorProcess.start()` needs to parse a file: + +``` +_resolve_processor_target(path, bundle_name, bundle_path) + for each class_path in ProvidersManager().coordinators: + coordinator_cls = import_string(class_path) + if coordinator_cls.can_handle_dag_file(bundle_name, path): + return functools.partial(coordinator_cls.run_dag_parsing, path=..., bundle_name=..., bundle_path=...) + return None # fall back to default Python parser +``` + +The first coordinator whose `can_handle_dag_file()` returns `True` wins. If none match, the default Python `_parse_file_entrypoint` runs. + +### What the Base Class Handles Automatically + +The matched coordinator's `run_dag_parsing()` (a concrete method on `BaseCoordinator`) delegates to `_runtime_subprocess_entrypoint()`, which handles all the TCP/process plumbing: + +1. Creates two TCP servers on `127.0.0.1` with random ports (comm + logs) +2. Creates a stderr socketpair +3. Calls `dag_parsing_cmd()` to get the command +4. Spawns the subprocess with `stdin=DEVNULL` (does NOT inherit fd 0) +5. Accepts TCP connections from the subprocess +6. Wraps fd 0 as `supervisor_comm` via `os.dup(0)` +7. Runs `_bridge()` — a raw byte forwarder between fd 0 and the TCP comm socket + +### Expected E2E Flow + +``` +Airflow Dag-Processor + │ + ▼ +DagFileProcessorProcess.start(path, bundle_name, bundle_path) + │ + ├─ _resolve_processor_target() + │ └─ iterates process-coordinators from provider.yaml + │ └─ first can_handle_dag_file() == True wins + │ + ▼ +WatchedSubprocess.start(target=coordinator.run_dag_parsing) + │ + [fork — child process gets fd 0 as Unix domain socket to supervisor] + │ + ▼ (in child) +Coordinator.run_dag_parsing(path, bundle_name, bundle_path) + │ + ▼ +BaseCoordinator._runtime_subprocess_entrypoint(DagParsingInfo) + │ + ├─ 1. Create TCP comm_server + logs_server on 127.0.0.1:random + ├─ 2. Create stderr socketpair + ├─ 3. Call dag_parsing_cmd() → get launch command + ├─ 4. Popen(cmd, stdin=DEVNULL, stderr=child_stderr) + ├─ 5. Accept TCP connections from the language runtime + ├─ 6. supervisor_comm = socket(fileno=os.dup(0)) + └─ 7. _bridge() — raw byte forwarding until process exits +``` + +### Expected Message Sequence + +Once the bridge is running, the Airflow supervisor and the language runtime communicate directly through the bridge (raw bytes, no re-encoding): + +``` +Airflow Supervisor Bridge Language Runtime + │ │ │ + ├── DagFileParseRequest ──────────┼──────────────────────►│ + │ [4-byte len][msgpack frame] │ raw byte forward │ + │ │ │ + │ │ ├── parse DAGs from + │ │ │ bundle/file + │ │ │ + │◄── DagFileParsingResult ────────┼───────────────────────┤ + │ [4-byte len][msgpack frame] │ raw byte forward │ + │ │ │ + │ │ └── exit(0) + │ │ + │ └── drain remaining bytes (5s deadline) + │ close all sockets +``` + +### DagFileParsingResult Format + +The language runtime must produce a `DagFileParsingResult` that matches Python Airflow's DagSerialization format exactly. The Airflow scheduler deserializes this into its internal model — any divergence causes parsing failures. + +**Envelope:** + +``` +{ + "type": "DagFileParsingResult", + "fileloc": "", + "serialized_dags": [ + { + "data": { + "__version": 3, + "dag": { } + } + }, + ... + ] +} +``` + +**Serialized DAG structure** (version 3): + +| Field | Type | Required | Description | +|---|---|---|---| +| `dag_id` | string | yes | Unique identifier | +| `fileloc` | string | yes | Source file path (can be empty) | +| `relative_fileloc` | string | yes | Relative source path (can be empty) | +| `timezone` | string | yes | Always `"UTC"` | +| `timetable` | `{__type, __var}` | yes | Schedule timetable (see below) | +| `tasks` | list | yes | Serialized task list | +| `dag_dependencies` | list | yes | Empty list for non-Python DAGs | +| `task_group` | map | yes | Flat root task group | +| `edge_info` | map | yes | Empty map | +| `params` | list | yes | DAG-level parameters | +| `description` | string | if set | | +| `start_date` | float (epoch) | if set | Unwrapped from `__type`/`__var` | +| `end_date` | float (epoch) | if set | Unwrapped from `__type`/`__var` | +| `tags` | list | if non-empty | Unwrapped from `__type`/`__var` | +| `catchup` | bool | if `true` | | +| `max_active_tasks` | int | if non-default | | +| `max_active_runs` | int | if non-default | | + +**Timetable encoding:** + +| Schedule | `__type` | `__var` | +|---|---|---| +| `null` | `airflow.timetables.simple.NullTimetable` | `{}` | +| `@once` | `airflow.timetables.simple.OnceTimetable` | `{}` | +| `@continuous` | `airflow.timetables.simple.ContinuousTimetable` | `{}` | +| cron expr | `airflow.timetables.trigger.CronTriggerTimetable` | `{expression, timezone, interval, run_immediately}` | + +**Task encoding:** + +``` +{ + "__type": "operator", + "__var": { + "task_id": "", + "task_type": "", + "_task_module": "", + "downstream_task_ids": [""] // only if non-empty + } +} +``` + +**Value type encoding** (for complex fields): + +| Type | Encoding | +|---|---| +| datetime | `{"__type": "datetime", "__var": }` | +| timedelta | `{"__type": "timedelta", "__var": }` | +| dict | `{"__type": "dict", "__var": {k: serialize(v), ...}}` | +| set | `{"__type": "set", "__var": [sorted_items]}` | +| list | `[serialize(item), ...]` (no wrapper) | +| primitives | pass through unchanged | + +**Non-decorated vs decorated fields:** Some fields (like `start_date`, `end_date`, `tags`) are "non-decorated" — they are serialized with `__type`/`__var` wrapping but then unwrapped to just the `__var` value before inclusion in the DAG dict. Other fields (like `default_args`, `access_control`) are "decorated" — they keep the `__type`/`__var` wrapper. This matches Python's `serialize_to_json` behavior. + +### What a Language Provider Must Implement + +For DAG parsing, a new language provider needs: + +1. **A `BaseCoordinator` subclass** with: + - `can_handle_dag_file()` — language-specific file detection (e.g., "is this a JAR?", "is this a .go file?") + - `dag_parsing_cmd()` — returns the command to launch the runtime + +2. **A runtime process** that: + - Accepts `--comm=host:port` and `--logs=host:port` CLI arguments + - Connects to both TCP addresses + - Reads a `DagFileParseRequest` msgpack frame from the comm channel + - Parses the DAGs from the bundle + - Serializes the result to DagSerialization v3 format + - Sends back a `DagFileParsingResult` msgpack frame + - Exits + +3. **Registration** in `provider.yaml` under `process-coordinators` + +### Java as a Concrete Example + +**JavaCoordinator:** + +The Java provider implements all DAG-parsing contracts in a single `BaseCoordinator` subclass: + +```python +# providers/sdk/java/coordinator.py +class JavaCoordinator(BaseCoordinator): + sdk = "java" + + @classmethod + def can_handle_dag_file(cls, bundle_name, path) -> bool: + # Returns True when path is a JAR with a Main-Class manifest entry + with contextlib.suppress(FileNotFoundError): + return find_main_class(Path(path)) is not None + return False + + @classmethod + def dag_parsing_cmd(cls, *, dag_file_path, bundle_name, bundle_path, comm_addr, logs_addr): + main_class = find_main_class(Path(dag_file_path)) + return [ + "java", + "-classpath", + f"{bundle_path}/*", + main_class, + f"--comm={comm_addr}", + f"--logs={logs_addr}", + ] +``` + +`can_handle_dag_file()` checks that the file is a JAR with a `Main-Class` in its manifest. This ensures the coordinator only claims files it can actually handle. + +The classpath is `/*` — a wildcard that includes all JARs in the directory (the application JAR plus its dependencies). + +No separate `JavaDagFileProcessor` class is needed — `BaseCoordinator` consolidates file detection, DAG parsing, and task execution into a single extension point. + +**Java SDK Bundle Process:** + +The Java bundle process (`Server.kt`) starts, connects to both TCP servers, and enters `CoordinatorComm.startProcessing()`. When it receives a `DagFileParseRequest`: + +``` +CoordinatorComm.handleIncoming(frame) + │ + ├── frame.body is DagFileParseRequest + │ file: String ← the path from the request + │ + ▼ +DagParser(request.file).parse(bundle) + │ + ├── Returns DagParsingResult(fileloc=file, dags=bundle.dags) + │ The DAGs were already loaded into the Bundle at startup + │ via DagBundle.getDags() + │ + ▼ +sendMessage(frame.id, result) + │ + ├── CoordinatorComm.encode(OutgoingFrame(id, result)) + │ ├── detects DagParsingResult type + │ └── calls result.serialize() ← Serde.kt + │ + ├── DagParsingResult.serialize() + │ ├── Wraps each DAG: {"data": {"__version": 3, "dag": dag.serialize(id)}} + │ ├── Dag.serialize() produces the full v3 format: + │ │ timetable, tasks, task_group, params, optional fields... + │ ├── Task.serialize() wraps as {"__type": "operator", "__var": {...}} + │ └── serializeValue() handles datetime/timedelta/dict/set encoding + │ + ├── TaskSdkFrames.encodeRequest(id, serializedMap) + │ ├── Converts map to msgpack: [id, body] + │ └── Returns byte array + │ + └── Writes [4-byte length prefix][msgpack payload] to comm channel + +shutDownRequested = true ← one-shot, process will exit +``` + +**Java SDK DagBundle Interface:** + +Bundle authors implement `DagBundle` to define their DAGs: + +```java +public class JavaExample implements DagBundle { + @Override + public List getDags() { + var dag = new Dag("java_example", null, "@daily"); + dag.addTask("extract", Extract.class, List.of()); + dag.addTask("transform", Transform.class, List.of("extract")); + dag.addTask("load", Load.class, List.of("transform")); + return List.of(dag); + } + + public static void main(String[] args) { + var example = new JavaExample(); + var bundle = new Bundle( + JavaExample.class.getPackage().getImplementationVersion(), + example.getDags() + ); + Server.create(args).serve(bundle); + } +} +``` + +The `Dag` class provides a fluent API: + +- `dagId`, `description`, `schedule` (cron or preset), `startDate`, `endDate`, and all standard Airflow DAG parameters +- `addTask(id, taskClass, dependsOn)` — registers a task and its upstream dependencies +- Dependencies are stored as a `dependants` map (parent → set of children), serialized as `downstream_task_ids` + +**Java SDK Serialization Compatibility:** + +The serialization in `Serde.kt` is validated against Python's output: + +```bash +# 1. Java generates serialized output +./gradlew sdk:test +# → writes validation/serialization/serialized_java.json + +# 2. Python generates the same DAGs +uv run validation/serialization/serialize_python.py \ + validation/serialization/test_dags.yaml \ + validation/serialization/serialized_python.json + +# 3. Field-by-field comparison +uv run validation/serialization/compare.py \ + validation/serialization/serialized_python.json \ + validation/serialization/serialized_java.json +``` + +Both share test cases defined in `test_dags.yaml`, ensuring the Java SDK produces byte-identical output to Python's `DagSerialization.serialize_dag()` for the same inputs. + +## Consequences + +- The DAG file processor can be extended to any language without modifying Airflow Core — only a provider with a `BaseCoordinator` subclass is needed. +- The language runtime must produce exact DagSerialization v3 JSON, requiring cross-language validation infrastructure (e.g., `test_dags.yaml` + `compare.py`). +- The base class absorbs all TCP/process plumbing, so language providers only implement two methods for DAG parsing. +- The subprocess bridge adds latency and a process boundary; DAG parsing for non-Python files is inherently slower than in-process Python parsing. diff --git a/java-sdk/adr/0003-workload-execution.md b/java-sdk/adr/0003-workload-execution.md new file mode 100644 index 0000000000000..afa90a16e9e71 --- /dev/null +++ b/java-sdk/adr/0003-workload-execution.md @@ -0,0 +1,473 @@ + + +# ADR-0003: Workload Execution — Language-Specific Task Execution + +## Status + +Accepted + +## Context + +Airflow's standard task runner executes Python callables. To support tasks written in other languages, the pipeline needs an extension point where a language-specific coordinator can intercept the execution, delegate to an external runtime process, and bridge the Task SDK protocol so the external process can access Airflow services (connections, variables, XCom) during execution. + +This ADR details the task execution side of the coordinator architecture described in [ADR-0001](0001-java-sdk-airflow-integration.md). It starts with the generic model — the abstract contracts and expected behavior that any language must implement — then walks through Java as a concrete example. + +## Decision + +### Extension Point: `BaseCoordinator` + +The same `BaseCoordinator` base class that handles DAG parsing also handles task execution. It is registered in `provider.yaml` under `coordinators`. For task execution, a subclass must implement: + +| Method | Signature | Responsibility | +|---|---|---| +| `task_execution_cmd` | `(what, dag_rel_path, bundle_info, comm_addr, logs_addr) -> list[str]` | Return the full command to launch the language runtime for task execution. `comm_addr` and `logs_addr` are `host:port` strings the process must connect to. | + +The base class provides `run_task_execution()` as a concrete method that handles all TCP/process plumbing automatically (same pattern as `run_dag_parsing()` for the DAG parsing side). + +**Parameters passed to `run_task_execution()`:** + +| Parameter | Type | Description | +|---|---|---| +| `what` | `TaskInstance` | The task instance to execute (id, dag_id, task_id, run_id, try_number, etc.) | +| `dag_rel_path` | `str \| PathLike` | Relative path to the DAG file / bundle within the bundle root | +| `bundle_info` | `BundleInfo` | Bundle name and version | +| `startup_details` | `StartupDetails` | Full startup context (task instance, DAG rel path, bundle info, run context, start date) — already consumed from fd 0 | + +### Registration + +The same `coordinators` entry in `provider.yaml` covers both DAG parsing and task execution — no separate registration needed: + +```yaml +coordinators: + - airflow.providers.sdk..coordinator. +``` + +### Discovery: `_resolve_runtime_entrypoint()` + +When `task_runner.main()` starts, before any Python task execution: + +``` +task_runner.main() + → startup_details = get_startup_details() # reads from fd 0 + → _resolve_runtime_entrypoint(startup_details) + for each class_path in ProvidersManagerTaskRuntime().process_coordinators: + coordinator_cls = import_string(class_path) + if not hasattr(coordinator_cls, "run_task_execution"): + continue + return functools.partial(coordinator_cls.run_task_execution, + what=..., dag_rel_path=..., bundle_info=..., startup_details=...) + return None # fall back to default Python execution + + → if runtime_entrypoint is not None: + runtime_entrypoint() # language-specific execution + return # short-circuit — skip Python execution entirely +``` + +> **Note:** Currently the first coordinator with `run_task_execution` wins. `QueueToCoordinatorMapper` maps the task's `queue` to the correct coordinator via the `[sdk] queue_to_sdk` configuration. + +### Expected E2E Flow + +``` +Airflow Executor (dispatches task) + │ + ▼ +WatchedSubprocess.start(target=task_runner.main) + │ + [fork — child process gets fd 0 as Unix domain socket to supervisor] + │ + ▼ (in child) +task_runner.main() + │ + ├─ get_startup_details() ← reads StartupDetails from fd 0 + │ + ├─ _resolve_runtime_entrypoint() + │ └─ iterates coordinators from provider.yaml + │ └─ first with run_task_execution wins + │ + ▼ +Coordinator.run_task_execution(what, dag_rel_path, bundle_info, startup_details) + │ + ▼ +BaseCoordinator._runtime_subprocess_entrypoint(TaskExecutionInfo) + │ + ├─ 1. Create TCP comm_server + logs_server on 127.0.0.1:random + ├─ 2. Create stderr socketpair + ├─ 3. Call task_execution_cmd() → get launch command + ├─ 4. Popen(cmd, stdin=DEVNULL, stderr=child_stderr) + ├─ 5. Accept TCP connections from the language runtime + ├─ 6. _send_startup_details(runtime_comm, startup_details) + │ └─ re-serializes with model_dump(mode="json") to avoid + │ msgpack extension types non-Python decoders can't handle + ├─ 7. supervisor_comm = socket(fileno=os.dup(0)) + └─ 8. _bridge() — raw byte forwarding until process exits +``` + +Key difference from DAG parsing: In task execution, `task_runner.main()` has already consumed `StartupDetails` from fd 0. The bridge must re-send `StartupDetails` to the language runtime over TCP before starting the byte-forwarding bridge. This is done via `_send_startup_details()`, which re-serializes using JSON mode to avoid msgpack extension types (like `Timestamp`) that non-Python decoders may not support. + +### Expected Message Sequence + +Task execution is a multi-round conversation, unlike DAG parsing's single request/response: + +``` +Airflow Supervisor Bridge Language Runtime + │ │ │ + │ [StartupDetails sent by bridge directly] │ + │ ├── StartupDetails ────►│ + │ │ │ + │ │ ├── Look up task + │ │ │ from bundle + │ │ │ + │ │ ┌───────────────────┤ + │ │ │ Task code runs │ + │ │ │ and may request: │ + │ │ │ │ + │◄── GetConnection(conn_id) ──────┼───┤ │ + │ │ │ │ + ├── ConnectionResult ─────────────┼──►│ │ + │ │ │ │ + │◄── GetVariable(key) ────────────┼───┤ │ + │ │ │ │ + ├── VariableResult ───────────────┼──►│ │ + │ │ │ │ + │◄── GetXCom(key, dag_id, ...) ───┼───┤ │ + │ │ │ │ + ├── XComResult ───────────────────┼──►│ │ + │ │ │ │ + │◄── SetXCom(key, value, ...) ────┼───┤ │ + │ │ │ │ + ├── (empty response) ─────────────┼──►│ │ + │ │ │ │ + │ │ └───────────────────┤ + │ │ │ + │◄── SucceedTask / TaskState ─────┼───────────────────────┤ + │ (terminal — no response) │ │ + │ │ └── exit(0) + │ │ + │ └── drain, close sockets +``` + +### Task SDK Protocol Messages + +The language runtime exchanges these message types with the Airflow supervisor: + +**Runtime → Supervisor (requests):** + +| Message | Fields | Purpose | +|---|---|---| +| `GetConnection` | `conn_id` | Fetch an Airflow connection by ID | +| `GetVariable` | `key` | Fetch an Airflow variable by key | +| `GetXCom` | `key`, `dag_id`, `task_id`, `run_id`, `map_index?`, `include_prior_dates?` | Fetch an XCom value | +| `SetXCom` | `key`, `value`, `dag_id`, `task_id`, `run_id`, `map_index`, `mapped_length?` | Store an XCom value | +| `SucceedTask` | `end_date`, `task_outlets?`, `outlet_events?` | Terminal: task succeeded | +| `TaskState` | `state` (`"failed"`, `"removed"`, `"skipped"`), `end_date` | Terminal: task ended non-successfully | + +**Supervisor → Runtime (responses):** + +| Message | Fields | In response to | +|---|---|---| +| `ConnectionResult` | `conn_id`, `conn_type`, `host`, `schema`, `login`, `password`, `port`, `extra` | `GetConnection` | +| `VariableResult` | `key`, `value` | `GetVariable` | +| `XComResult` | `key`, `value` | `GetXCom` | +| (empty) | | `SetXCom` | +| `ErrorResponse` | `error`, `detail` | Any request that failed server-side | + +**Framing:** Every message is a length-prefixed msgpack frame. Requests are `[id, body]` (2-element array); responses are `[id, body, error]` (3-element array). The `id` field correlates request/response pairs. + +### Request/Response Semantics + +The task execution follows a synchronous request/response pattern from the runtime's perspective: + +1. The runtime sends a request frame (e.g., `GetVariable`) with an incrementing `id` +2. The supervisor reads the frame, fulfills the request (e.g., calls the Execution API), and sends back a response with the same `id` +3. The runtime blocks until it receives the response +4. This repeats for each Airflow service call the task code makes +5. When the task finishes, the runtime sends a terminal message (`SucceedTask` or `TaskState`) — no response is expected, and the process exits + +### StartupDetails + +The first message the runtime receives is `StartupDetails`, which provides full context for the task: + +| Field | Type | Description | +|---|---|---| +| `ti` | `TaskInstance` | id, task_id, dag_id, run_id, try_number, dag_version_id, map_index, context_carrier | +| `dag_rel_path` | string | Relative path to the DAG file / bundle | +| `bundle_info` | `BundleInfo` | name, version | +| `start_date` | datetime | When this task attempt started | +| `ti_context` | `TIRunContext` | DAG run context (logical date, data interval, etc.) | +| `sentry_integration` | string | Sentry DSN for error reporting (optional) | + +### What a Language Provider Must Implement + +For task execution, a new language provider needs: + +1. **A `BaseCoordinator` subclass** with: + - `task_execution_cmd()` — returns the command to launch the runtime + - (This is the same subclass that implements `can_handle_dag_file()` and `dag_parsing_cmd()` for DAG parsing — one class covers both) + +2. **A runtime process** that: + - Accepts `--comm=host:port` and `--logs=host:port` CLI arguments + - Connects to both TCP addresses + - Reads a `StartupDetails` msgpack frame from the comm channel + - Looks up the task to execute from its bundle using `ti.dag_id` and `ti.task_id` + - Executes the task, making `GetConnection`/`GetVariable`/`GetXCom`/`SetXCom` requests as needed + - Sends `SucceedTask` on success or `TaskState("failed")` on failure + - Exits + +3. **A task interface** that user code implements (analogous to Python's `@task` decorator or `BaseOperator`) + +4. **A client API** that wraps the socket protocol behind a simple interface (get_connection, get_variable, get_xcom, set_xcom) so task authors don't deal with framing + +5. **Registration** in `provider.yaml` under `coordinators` (same entry as DAG parsing — no separate registration) + +### Java as a Concrete Example + +**JavaCoordinator (Python side):** + +The same `JavaCoordinator` that handles DAG parsing also handles task execution — no separate `JavaTaskCoordinator` class is needed: + +```python +# providers/sdk/java/coordinator.py +class JavaCoordinator(BaseCoordinator): + sdk = "java" + + @classmethod + def can_handle_dag_file(cls, bundle_name, path) -> bool: + with contextlib.suppress(FileNotFoundError): + return find_main_class(Path(path)) is not None + return False + + @classmethod + def dag_parsing_cmd(cls, *, dag_file_path, bundle_name, bundle_path, comm_addr, logs_addr): + main_class = find_main_class(Path(dag_file_path)) + return [ + "java", + "-classpath", + f"{bundle_path}/*", + main_class, + f"--comm={comm_addr}", + f"--logs={logs_addr}", + ] + + @classmethod + def task_execution_cmd(cls, *, what, dag_rel_path, bundle_info, comm_addr, logs_addr): + jar_path = Path(dag_rel_path) + main_class = find_main_class(jar_path) + return [ + "java", + "-classpath", + f"{jar_path.parent}/*", + main_class, + f"--comm={comm_addr}", + f"--logs={logs_addr}", + ] +``` + +One class, one `provider.yaml` entry, covers both DAG parsing and task execution. + +**Java SDK Task Interface:** + +User task code implements a single-method interface: + +```java +// sdk: org.apache.airflow.sdk.Task +public interface Task { + void execute(Client client) throws Exception; +} +``` + +The `Client` provides access to Airflow services: + +```java +// sdk: org.apache.airflow.sdk.Client +public class Client { + // Access task metadata + public StartupDetails getDetails(); + + // Airflow services + public Connection getConnection(String id); + public Object getVariable(String key); + public Object getXCom(String key, String dagId, String taskId, String runId, ...); + public void setXCom(String key, Object value); // defaults: key="return_value", dagId/taskId/runId from current task +} +``` + +**Java SDK Task Execution Flow:** + +When the bundle process receives `StartupDetails`: + +``` +CoordinatorComm.handleIncoming(frame) + │ + ├── frame.body is StartupDetails + │ ti: TaskInstance (id, dagId, taskId, runId, tryNumber, ...) + │ dagRelPath, bundleInfo, startDate, tiContext + │ + ▼ +TaskRunner.run(bundle, request, comm) + │ + ├── Create Client(request, CoordinatorClient(comm)) + │ CoordinatorClient wraps the comm channel behind the Client interface + │ + ├── Look up task class: + │ bundle.dags[request.ti.dagId]?.tasks[request.ti.taskId] + │ └── if not found → return TaskState("removed") + │ + ├── Instantiate task: + │ task.getDeclaredConstructor().newInstance() + │ + ├── Execute: + │ try { + │ instance.execute(client) ← USER TASK CODE RUNS HERE + │ return SucceedTask() + │ } catch (Exception e) { + │ return TaskState("failed") + │ } + │ + ▼ +sendMessage(frame.id, result) ← sends SucceedTask or TaskState back +shutDownRequested = true ← one-shot, process will exit +``` + +**Java SDK Airflow Service Access:** + +When user task code calls `client.getVariable("my_key")`, the call chain is: + +``` +client.getVariable("my_key") // Client.kt (public SDK) + │ + └── impl.getVariable("my_key") // CoordinatorClient (execution) + │ + └── runBlocking { // blocks the calling thread + comm.communicate( // CoordinatorComm + GetVariable(key = "my_key") + ) + } + │ + ├── sendMessage(nextId++, GetVariable) // encode + write to comm socket + │ ├── encode: [id, {"type": "GetVariable", "key": "my_key"}] + │ └── write: [4-byte len][msgpack] + │ + ├── processOnce(::handle) // block until response arrives + │ ├── read 4-byte length prefix + │ ├── read payload + │ └── decode: [id, {"type": "VariableResult", ...}, null] + │ + └── return response.value // unwrap VariableResponse +``` + +This is fully synchronous from the task code's perspective — `getVariable()` blocks until the supervisor responds. + +**Java SDK Example Task Implementation:** + +```java +public static class Extract implements Task { + public void execute(Client client) throws Exception { + // Read XCom from a Python task in the same DAG + var pythonXcom = client.getXCom("python_task_1"); + + // Access Airflow connections + var connection = client.getConnection("test_http"); + + // Do work... + Thread.sleep(6000); + + // Push XCom for downstream tasks (Java or Python) + client.setXCom(new Date().getTime()); + } +} + +public static class Transform implements Task { + public void execute(Client client) { + // Read XCom from upstream Java task + var extractXcom = client.getXCom("extract"); + + // Access Airflow variables + var variable = client.getVariable("my_variable"); + + // Push XCom (readable by downstream Python tasks) + client.setXCom(new Date().getTime()); + } +} + +public static class Load implements Task { + public void execute(Client client) { + var xcom = client.getXCom("transform"); + throw new RuntimeException("I failed"); + // Exception → TaskRunner catches → sends TaskState("failed") + } +} +``` + +**Java SDK Complete Bundle Entry Point:** + +```java +public class JavaExample implements DagBundle { + @Override + public List getDags() { + var dag = new Dag("java_example", null, "@daily"); + dag.addTask("extract", Extract.class, List.of()); + dag.addTask("transform", Transform.class, List.of("extract")); + dag.addTask("load", Load.class, List.of("transform")); + return List.of(dag); + } + + public static void main(String[] args) { + var bundle = new Bundle( + JavaExample.class.getPackage().getImplementationVersion(), + new JavaExample().getDags() + ); + Server.create(args).serve(bundle); // parses --comm/--logs, connects, enters message loop + } +} +``` + +The same `main()` entry point handles both DAG parsing and task execution — the first message received (`DagFileParseRequest` or `StartupDetails`) determines the mode. + +**Java SDK Java-side Supervisor (Alternative Execution Path):** + +The Java SDK also provides `Supervisor.kt` for execution contexts where there is no Python process (e.g., the Edge Worker). In this path, the Supervisor terminates the protocol directly instead of bridging: + +``` +Supervisor.run(request) + │ + ├── Create TCP comm + logs servers + ├── Spawn Java bundle process with --comm/--logs + ├── Accept connections + ├── HTTP PATCH task → running state + ├── Send StartupDetails to bundle via comm socket + │ + └── serveTaskSdkRequests() loop: + Read frame from bundle + ├── GetConnection → HTTP GET /connections/{id} → send response + ├── GetVariable → HTTP GET /variables/{key} → send response + ├── GetXCom → HTTP GET /xcom/... → send response + ├── SetXCom → HTTP POST /xcom/... → send response + └── SucceedTask/TaskState → HTTP PATCH terminal state → exit loop +``` + +The bundle process behaves identically in both paths — it is unaware of whether its comm channel leads to a Python bridge or a Java Supervisor. This is the core design invariant of the Java SDK. + +## Consequences + +- Task execution for any language reuses the same coordinator + bridge pattern as DAG parsing, keeping the extension surface small. +- The multi-round protocol (GetConnection, GetVariable, etc.) means the language runtime has full access to Airflow services without reimplementing them — they stay in Python. +- The synchronous request/response model is simple for language SDK authors but adds a round-trip per service call. +- The Java-side Supervisor (`Supervisor.kt`) provides an alternative execution path for environments without Python, but requires the Java SDK to implement HTTP calls to the Execution API directly. +- Task authors interact with a simple `Client` interface, completely abstracted from the underlying socket protocol. diff --git a/java-sdk/adr/0004-pure-java-dags.md b/java-sdk/adr/0004-pure-java-dags.md new file mode 100644 index 0000000000000..a3c42672c24cf --- /dev/null +++ b/java-sdk/adr/0004-pure-java-dags.md @@ -0,0 +1,228 @@ + + +# ADR-0004: Pure Java DAGs — Build-Time Packaging and Code Visibility + +## Status + +Accepted + +## Context + +[ADR-0001](0001-java-sdk-airflow-integration.md) introduces two ways to integrate non-Python tasks: `@task.stub` (mixed Python+Java DAGs) and pure Java DAGs (entire DAG in Java via `DagBundle`). [ADR-0002](0002-dag-parsing.md) and [ADR-0003](0003-workload-execution.md) describe the coordinator infrastructure for DAG parsing and task execution respectively. + +This ADR focuses on the Java-SDK-specific concerns that make pure Java DAGs work end-to-end — build-time metadata generation, source code packaging for UI visibility, and JAR manifest conventions — rather than the shared coordinator infrastructure already covered in those ADRs. + +The central challenge is that Airflow Core expects to read DAG metadata and source code from files on disk or from the metadata DB. A JAR is an opaque binary — Airflow cannot `open()` it and read Python source. The Java SDK must bridge this gap at build time by embedding machine-readable metadata and human-readable source into the JAR itself. + +## Decision + +### JAR Manifest Conventions + +The JAR manifest (`META-INF/MANIFEST.MF`) carries three SDK-specific attributes that Airflow and the Java SDK use to bootstrap a bundle: + +| Attribute | Example Value | Purpose | +|---|---|---| +| `Main-Class` | `org.apache.airflow.example.JavaExample` | Standard Java attribute; the coordinator uses it to launch the JVM | +| `Airflow-Java-SDK-Metadata` | `airflow-metadata.yaml` | Points to the embedded metadata file (dag IDs, task IDs) | +| `Airflow-Java-SDK-Dag-Code` | `JavaExample.java` | Points to the embedded source file for Airflow UI display | + +These attributes are set in the Gradle build (see [Build-Time Packaging](#build-time-packaging-gradle) below). The Python-side coordinator reads `Main-Class` to construct the launch command; `BundleScanner` reads `Airflow-Java-SDK-Metadata` to discover DAG IDs without launching the JVM. + +### Build-Time Metadata: `airflow-metadata.yaml` + +At build time, the SDK runs `BundleInspector` — a build-time utility that reflectively instantiates the user's `DagBundle` class, calls `getDags()`, and writes a YAML file listing every DAG ID and its task IDs: + +```yaml +dags: + java_example: + tasks: + - extract + - transform + - load +``` + +This file is embedded in the JAR root and referenced by the `Airflow-Java-SDK-Metadata` manifest attribute. + +**Why build-time, not runtime?** The metadata must be available before the JVM starts. `BundleScanner` reads it from the JAR to discover which DAG IDs a bundle contains — this is used for `@task.stub` routing (mapping a `dag_id` to the correct bundle's classpath) without paying JVM startup cost. For pure Java DAGs, the coordinator already knows the bundle path, but the metadata is still useful for validation and tooling. + +**`BundleInspector`:** + +```kotlin +object BundleInspector { + @JvmStatic + fun main(args: Array) { + val className = args[0] + val outputPath = args[1] + val clazz = Class.forName(className) + val instance = clazz.getDeclaredConstructor().newInstance() as? DagBundle + ?: error("$className does not implement DagBundle") + val dags = instance.getDags() + File(outputPath).apply { parentFile.mkdirs() }.writeText(toYaml(dags)) + } + + internal fun toYaml(dags: List): String = buildString { + appendLine("dags:") + for (dag in dags) { + appendLine(" ${dag.dagId}:") + appendLine(" tasks:") + for (taskId in dag.tasks.keys) { + appendLine(" - $taskId") + } + } + } +} +``` + +### Source Code Packaging for UI Visibility + +Airflow stores DAG source code in the `dag_code` table and displays it in the web UI. For Python DAGs this is trivial — `DagCode.write_code()` reads the `.py` file from disk. For a JAR, the raw bytecode is not human-readable. + +The solution: pack the original `.java` source file into the JAR at build time. The `Airflow-Java-SDK-Dag-Code` manifest attribute tells the coordinator which file to extract. + +On the Python side, `get_code_from_file()` on the coordinator: + +1. Opens the JAR as a ZIP +2. Reads the `Airflow-Java-SDK-Dag-Code` attribute from the manifest +3. Extracts and returns the raw `.java` source + +This lets Airflow's existing `DagCode` infrastructure store and display Java source code with no changes to Airflow Core. + +### Build-Time Packaging (Gradle) + +The `example/build.gradle.kts` shows the complete packaging pattern: + +```kotlin +val bundleMainClass = application.mainClass.get() +val metadataFileName = "airflow-metadata.yaml" +val metadataOutputDir = layout.buildDirectory.dir("airflow-metadata") +val dagCodeSourcePath = bundleMainClass.replace('.', '/') + ".java" +val dagCodeFileName = bundleMainClass.substringAfterLast('.') + ".java" + +// 1. Run BundleInspector at compile time to generate metadata +val inspectBundle = tasks.register("inspectBundle") { + dependsOn("classes") + classpath = sourceSets.main.get().runtimeClasspath + mainClass.set("org.apache.airflow.sdk.BundleInspector") + args = listOf(bundleMainClass, metadataOutputDir.get().file(metadataFileName).asFile.absolutePath) +} + +// 2. Pack metadata + source into the JAR +tasks.withType { + dependsOn(inspectBundle) + from(metadataOutputDir) // airflow-metadata.yaml + from("src/java/$dagCodeSourcePath") // raw .java source file + manifest { + attributes( + "Main-Class" to bundleMainClass, + "Airflow-Java-SDK-Version" to project.version, + "Airflow-Java-SDK-Metadata" to metadataFileName, + "Airflow-Java-SDK-Dag-Code" to dagCodeFileName, + ) + } +} +``` + +The resulting JAR contains: + +``` +example.jar +├── META-INF/MANIFEST.MF (Main-Class, SDK attributes) +├── airflow-metadata.yaml (dag IDs + task IDs) +├── JavaExample.java (raw source for UI display) +├── org/apache/airflow/example/ +│ ├── JavaExample.class (compiled bundle entry point) +│ ├── JavaExample$Extract.class +│ ├── JavaExample$Transform.class +│ └── JavaExample$Load.class +└── ... (SDK + dependency classes) +``` + +### `BundleScanner` — Runtime Bundle Discovery + +`BundleScanner` reads JAR manifests at runtime to discover bundles without launching the JVM. This is used by the `@task.stub` path to resolve which bundle contains a given `dag_id`. + +```kotlin +data class ResolvedBundle( + val mainClass: String, // From Main-Class manifest attribute + val classpath: String, // All JARs in bundle directory, colon-separated +) + +fun scanBundles(bundlesDir: Path): Map +``` + +It supports two directory layouts: + +- **Nested**: each subdirectory of `bundlesDir` is a bundle home (e.g., `bundles/my-app/lib/*.jar`) +- **Flat**: `bundlesDir` itself contains the JARs (e.g., `bundles/*.jar`) + +For each JAR, it reads the `Airflow-Java-SDK-Metadata` manifest attribute, extracts the referenced YAML, parses DAG IDs, and returns a mapping from `dag_id` to `ResolvedBundle`. + +### The DagBundle Authoring API + +Bundle authors implement `DagBundle` to define their DAGs: + +```java +public class JavaExample implements DagBundle { + + public static class Extract implements Task { + public void execute(Client client) throws Exception { + var connection = client.getConnection("test_http"); + client.setXCom(new Date().getTime()); + } + } + + public static class Transform implements Task { + public void execute(Client client) { + var extract_xcom = client.getXCom("extract"); + client.setXCom(new Date().getTime()); + } + } + + @Override + public List getDags() { + var dag = new Dag("java_example", null, "@daily"); + dag.addTask("extract", Extract.class, List.of()); + dag.addTask("transform", Transform.class, List.of("extract")); + return List.of(dag); + } + + public static void main(String[] args) { + var example = new JavaExample(); + var bundle = new Bundle( + JavaExample.class.getPackage().getImplementationVersion(), + example.getDags() + ); + Server.create(args).serve(bundle); + } +} +``` + +The `main()` method is the JVM entry point that the coordinator launches. It wires the `DagBundle` to the SDK's TCP communication layer (`Server` → `CoordinatorComm`), which handles DAG parsing requests and task execution commands as described in [ADR-0002](0002-dag-parsing.md) and [ADR-0003](0003-workload-execution.md). + +> **Note:** The current `DagBundle` interface is subject to review before the SDK reaches 1.0. Subclassing `Dag` directly may be a more natural fit and is being considered for post-OSS-integration. + +## Consequences + +- JAR bundles are self-contained: metadata, source, and compiled code are all in one artifact, simplifying deployment (copy one directory of JARs). +- Build-time metadata generation means DAG IDs can be discovered without JVM startup — important for `BundleScanner` and tooling. +- Source code packaging enables Airflow UI display with no changes to Airflow Core's `DagCode` infrastructure. +- The manifest convention (`Airflow-Java-SDK-*` attributes) is extensible — future attributes can carry additional metadata without breaking existing tooling. +- The build-time `BundleInspector` step adds a compile-time dependency on the SDK and requires the `DagBundle` class to be instantiable without side effects (no I/O, no connections in the constructor). +- Bundle authors must follow the Gradle packaging pattern (or replicate it in Maven/other build tools) — this is SDK-specific boilerplate that doesn't exist for Python DAGs. From 26e6fa9e933b41f38111ca69d69a8f403c149360 Mon Sep 17 00:00:00 2001 From: LIU ZHE YOU Date: Tue, 28 Apr 2026 14:08:16 +0800 Subject: [PATCH 04/16] Add lisense header --- docs/spelling_wordlist.txt | 1 + java-sdk/.editorconfig | 18 +++ java-sdk/build.gradle.kts | 19 +++ java-sdk/example/build.gradle.kts | 19 +++ .../apache/airflow/example/JavaExample.java | 19 +++ java-sdk/sdk/build.gradle.kts | 19 +++ .../kotlin/org/apache/airflow/sdk/Bundle.kt | 19 +++ .../org/apache/airflow/sdk/BundleInspector.kt | 19 +++ .../org/apache/airflow/sdk/BundleScanner.kt | 19 +++ .../kotlin/org/apache/airflow/sdk/Client.kt | 19 +++ .../kotlin/org/apache/airflow/sdk/Config.kt | 19 +++ .../org/apache/airflow/sdk/Connection.kt | 19 +++ .../main/kotlin/org/apache/airflow/sdk/Dag.kt | 19 +++ .../org/apache/airflow/sdk/DagBundle.kt | 19 +++ .../kotlin/org/apache/airflow/sdk/Server.kt | 19 +++ .../kotlin/org/apache/airflow/sdk/Task.kt | 19 +++ .../apache/airflow/sdk/execution/Client.kt | 19 +++ .../org/apache/airflow/sdk/execution/Comms.kt | 19 +++ .../apache/airflow/sdk/execution/DagParser.kt | 19 +++ .../apache/airflow/sdk/execution/JarUtils.kt | 19 +++ .../apache/airflow/sdk/execution/Logger.kt | 19 +++ .../apache/airflow/sdk/execution/MsgPack.kt | 19 +++ .../org/apache/airflow/sdk/execution/Serde.kt | 19 +++ .../airflow/sdk/execution/Supervisor.kt | 19 +++ .../airflow/sdk/execution/TaskRunner.kt | 19 +++ .../airflow/sdk/execution/TaskSdkFrames.kt | 19 +++ .../apache/airflow/sdk/BundleScannerTest.kt | 19 +++ .../org/apache/airflow/sdk/BundleTest.kt | 19 +++ .../org/apache/airflow/sdk/ConfigTest.kt | 19 +++ .../apache/airflow/sdk/CoordinatorCommTest.kt | 19 +++ .../apache/airflow/sdk/execution/CommsTest.kt | 19 +++ .../airflow/sdk/execution/DagParserTest.kt | 19 +++ .../SerializationCompatibilityTest.kt | 19 +++ .../airflow/sdk/execution/TaskRunnerTest.kt | 19 +++ .../sdk/execution/TestTaskSubprocess.kt | 117 ++++++++++++++++++ 35 files changed, 744 insertions(+) create mode 100644 java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TestTaskSubprocess.kt diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 021ae94b2f4fe..c744b684f9b65 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -1129,6 +1129,7 @@ opsgenie Optimise optimise optimizationObjective +OptIn optionality ora oracledb diff --git a/java-sdk/.editorconfig b/java-sdk/.editorconfig index 37bdc0ac6ea59..1b89a6e999824 100644 --- a/java-sdk/.editorconfig +++ b/java-sdk/.editorconfig @@ -1,3 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + root = true [*] diff --git a/java-sdk/build.gradle.kts b/java-sdk/build.gradle.kts index a9fb8d993533b..b2b367fc4fd91 100644 --- a/java-sdk/build.gradle.kts +++ b/java-sdk/build.gradle.kts @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + import com.diffplug.gradle.spotless.SpotlessExtension import org.jetbrains.kotlin.gradle.dsl.JvmTarget diff --git a/java-sdk/example/build.gradle.kts b/java-sdk/example/build.gradle.kts index e36ff5bd68199..a0bb2d4f8c6b8 100644 --- a/java-sdk/example/build.gradle.kts +++ b/java-sdk/example/build.gradle.kts @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + plugins { application } diff --git a/java-sdk/example/src/java/org/apache/airflow/example/JavaExample.java b/java-sdk/example/src/java/org/apache/airflow/example/JavaExample.java index 1a681dd2c9391..d6a82c799ef62 100644 --- a/java-sdk/example/src/java/org/apache/airflow/example/JavaExample.java +++ b/java-sdk/example/src/java/org/apache/airflow/example/JavaExample.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.example; import java.util.Date; diff --git a/java-sdk/sdk/build.gradle.kts b/java-sdk/sdk/build.gradle.kts index 08d0b9c9639ef..25acd07f4cde1 100644 --- a/java-sdk/sdk/build.gradle.kts +++ b/java-sdk/sdk/build.gradle.kts @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + import org.jetbrains.kotlin.gradle.tasks.KotlinCompile import org.jlleitschuh.gradle.ktlint.tasks.KtLintCheckTask import java.time.ZonedDateTime diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Bundle.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Bundle.kt index 56a4f0f65343c..514ffc788b2e6 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Bundle.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Bundle.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk class Bundle( diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleInspector.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleInspector.kt index 2da6b34c19829..186daa5b8fabc 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleInspector.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleInspector.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk import java.io.File diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleScanner.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleScanner.kt index 32fc591f7003c..e73698bd9f6cf 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleScanner.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleScanner.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk import com.fasterxml.jackson.databind.ObjectMapper diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Client.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Client.kt index 582496ff0e2e0..d6507b8345d55 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Client.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Client.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk import org.apache.airflow.sdk.execution.Client diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Config.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Config.kt index 1a6c69b2b901f..ad2c48dfd41b6 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Config.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Config.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk import com.fasterxml.jackson.databind.ObjectMapper diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Connection.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Connection.kt index 98a7dab39bded..16f3b72d0e198 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Connection.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Connection.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk data class Connection( diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Dag.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Dag.kt index 0df3f1251f105..c451e0c7ebcc0 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Dag.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Dag.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk import java.time.Duration diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/DagBundle.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/DagBundle.kt index 9890bd20b6076..0b266b83c7571 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/DagBundle.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/DagBundle.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk /** diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Server.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Server.kt index bc72bfa3296cf..741442a7f1a4c 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Server.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Server.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk import com.xenomachina.argparser.ArgParser diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Task.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Task.kt index 08ca52e2ae5c1..e65523e2c1dc6 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Task.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Task.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk import kotlin.Throws diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Client.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Client.kt index 7388b308e6d3c..f22af53868890 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Client.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Client.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk.execution import kotlinx.coroutines.runBlocking diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Comms.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Comms.kt index acc0e5bb4154f..0b5e50c4523c6 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Comms.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Comms.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk.execution import com.fasterxml.jackson.annotation.JsonProperty diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/DagParser.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/DagParser.kt index 4cba6fd775365..a8f6eb2713a2e 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/DagParser.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/DagParser.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk.execution import org.apache.airflow.sdk.Bundle diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/JarUtils.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/JarUtils.kt index 1b62162ab0ee3..7536f0d49f7f8 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/JarUtils.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/JarUtils.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk.execution import java.nio.file.Files diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Logger.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Logger.kt index 1378531673dbb..f769ed3325324 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Logger.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Logger.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk.execution import io.ktor.utils.io.ByteWriteChannel diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/MsgPack.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/MsgPack.kt index f951248bf6952..fb9a28542fcdc 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/MsgPack.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/MsgPack.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk.execution import com.fasterxml.jackson.core.JsonParser diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt index 4f44b42b9bd6b..b62150bbdb40d 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk.execution import org.apache.airflow.sdk.Dag diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Supervisor.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Supervisor.kt index 190eb1706b7e2..393e37c8cf005 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Supervisor.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Supervisor.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk.execution import kotlinx.coroutines.Dispatchers diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskRunner.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskRunner.kt index ed61d57149923..8223276e1fb5d 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskRunner.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskRunner.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk.execution import org.apache.airflow.sdk.Bundle diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskSdkFrames.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskSdkFrames.kt index 76e666dbb23f5..010c29a40f1d6 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskSdkFrames.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskSdkFrames.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk.execution import com.fasterxml.jackson.databind.DeserializationFeature diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleScannerTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleScannerTest.kt index 8bdacd8ce4e79..1d84487e6a04d 100644 --- a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleScannerTest.kt +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleScannerTest.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk import org.junit.jupiter.api.Assertions.assertEquals diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleTest.kt index 785394185e7cd..9671972a4fa55 100644 --- a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleTest.kt +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleTest.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk import org.junit.jupiter.api.Assertions diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/ConfigTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/ConfigTest.kt index 6ccdeef500e42..5cd9c33d3835d 100644 --- a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/ConfigTest.kt +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/ConfigTest.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk import org.junit.jupiter.api.Assertions.assertEquals diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/CoordinatorCommTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/CoordinatorCommTest.kt index d53f8f09c795d..455de10cc68e0 100644 --- a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/CoordinatorCommTest.kt +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/CoordinatorCommTest.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk import io.ktor.utils.io.ByteChannel diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/CommsTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/CommsTest.kt index fcca3ebf45f2b..3b91662751a5e 100644 --- a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/CommsTest.kt +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/CommsTest.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk.execution import org.apache.airflow.sdk.byteArrayFromHexString diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/DagParserTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/DagParserTest.kt index 8781e437fe268..5e2edb549c45c 100644 --- a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/DagParserTest.kt +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/DagParserTest.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk.execution import org.apache.airflow.sdk.Bundle diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SerializationCompatibilityTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SerializationCompatibilityTest.kt index a2a6118777793..41edbe3f2aa35 100644 --- a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SerializationCompatibilityTest.kt +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SerializationCompatibilityTest.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk.execution import com.fasterxml.jackson.databind.ObjectMapper diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TaskRunnerTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TaskRunnerTest.kt index 90593f136eb45..024d2d132f97d 100644 --- a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TaskRunnerTest.kt +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TaskRunnerTest.kt @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.sdk.execution import org.apache.airflow.sdk.Bundle diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TestTaskSubprocess.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TestTaskSubprocess.kt new file mode 100644 index 0000000000000..5b5f441dba82c --- /dev/null +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TestTaskSubprocess.kt @@ -0,0 +1,117 @@ +g/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is dis etributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.airflow.sdk.execution + +import java.io.InputStream +import java.io.OutputStream +import java.net.Socket + +/** + * Family of minimal subprocesses that simulate a Java task bundle process for integration testing + * of [Supervisor]. Each object has a `main` method that can be spawned by [Supervisor.run]. + * + * Protocol: connect to comm + logs sockets (via `--comm` / `--logs` CLI args), read the + * [StartupDetails] frame from the supervisor, perform a behavior-specific action, then exit. + */ +private fun connectAndProcess( + args: Array, + onFrame: (InputStream, OutputStream, IncomingFrame) -> Unit, +) { + val commAddr = args.first { it.startsWith("--comm=") }.removePrefix("--comm=") + val logsAddr = args.first { it.startsWith("--logs=") }.removePrefix("--logs=") + val (commHost, commPort) = commAddr.split(":") + val (logsHost, logsPort) = logsAddr.split(":") + + val commSocket = Socket(commHost, commPort.toInt()) + val logsSocket = Socket(logsHost, logsPort.toInt()) + try { + val commIn = commSocket.getInputStream() + val commOut = commSocket.getOutputStream() + val frame = TaskSdkFrames.readFrame(commIn, TaskSdkFrames.toTaskTypes) + onFrame(commIn, commOut, frame) + } finally { + commSocket.close() + logsSocket.close() + } +} + +/** Reads StartupDetails and immediately sends [SucceedTask]. */ +object TestSucceedSubprocess { + @JvmStatic + fun main(args: Array) = + connectAndProcess(args) { _, output, frame -> + TaskSdkFrames.writeRequest(output, frame.id, SucceedTask()) + } +} + +/** Reads StartupDetails and sends [TaskState] with state=failed. */ +object TestFailSubprocess { + @JvmStatic + fun main(args: Array) = + connectAndProcess(args) { _, output, frame -> + TaskSdkFrames.writeRequest(output, frame.id, TaskState(state = "failed")) + } +} + +/** Sends a [GetVariable] request, reads the response, then sends [SucceedTask]. */ +object TestGetVariableSubprocess { + @JvmStatic + fun main(args: Array) = + connectAndProcess(args) { input, output, frame -> + TaskSdkFrames.writeRequest(output, 10, GetVariable("test_var")) + TaskSdkFrames.readFrame(input, TaskSdkFrames.toBundleProcessTypes) + TaskSdkFrames.writeRequest(output, frame.id, SucceedTask()) + } +} + +/** Sends a [GetConnection] request, reads the response, then sends [SucceedTask]. */ +object TestGetConnectionSubprocess { + @JvmStatic + fun main(args: Array) = + connectAndProcess(args) { input, output, frame -> + TaskSdkFrames.writeRequest(output, 10, GetConnection("test_conn")) + TaskSdkFrames.readFrame(input, TaskSdkFrames.toBundleProcessTypes) + TaskSdkFrames.writeRequest(output, frame.id, SucceedTask()) + } +} + +/** Writes a message to stdout before succeeding, to verify log collection. */ +object TestStdoutSubprocess { + @JvmStatic + fun main(args: Array) { + println("stdout-line-1") + println("stdout-line-2") + System.err.println("stderr-line-1") + connectAndProcess(args) { _, output, frame -> + TaskSdkFrames.writeRequest(output, frame.id, SucceedTask()) + } + } +} + +/** Sends [SucceedTask] but exits with non-zero code — tests exit-code override logic. */ +object TestSucceedThenCrashSubprocess { + @JvmStatic + fun main(args: Array) { + connectAndProcess(args) { _, output, frame -> + TaskSdkFrames.writeRequest(output, frame.id, SucceedTask()) + } + // Force non-zero exit after the protocol completes cleanly. + Runtime.getRuntime().halt(42) + } +} From b47360d50d99712e6a8f44d6be845745f8407e29 Mon Sep 17 00:00:00 2001 From: LIU ZHE YOU Date: Tue, 28 Apr 2026 14:11:00 +0800 Subject: [PATCH 05/16] Add test coverage for process management --- java-sdk/sdk/build.gradle.kts | 1 + .../airflow/sdk/execution/SupervisorTest.kt | 455 ++++++++++++++++++ .../sdk/execution/TestTaskSubprocess.kt | 2 +- 3 files changed, 457 insertions(+), 1 deletion(-) create mode 100644 java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SupervisorTest.kt diff --git a/java-sdk/sdk/build.gradle.kts b/java-sdk/sdk/build.gradle.kts index 25acd07f4cde1..3e41ea0464109 100644 --- a/java-sdk/sdk/build.gradle.kts +++ b/java-sdk/sdk/build.gradle.kts @@ -59,6 +59,7 @@ dependencies { implementation("org.msgpack:jackson-dataformat-msgpack:0.9.11") testImplementation(kotlin("test")) + testImplementation("com.squareup.okhttp3:mockwebserver:4.12.0") } openApiGenerate { diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SupervisorTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SupervisorTest.kt new file mode 100644 index 0000000000000..c8b69da21c527 --- /dev/null +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SupervisorTest.kt @@ -0,0 +1,455 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.airflow.sdk.execution + +import kotlinx.coroutines.runBlocking +import okhttp3.mockwebserver.Dispatcher +import okhttp3.mockwebserver.MockResponse +import okhttp3.mockwebserver.MockWebServer +import okhttp3.mockwebserver.RecordedRequest +import org.apache.airflow.sdk.execution.api.model.TaskInstanceState as ExecutionTaskInstanceState +import org.junit.jupiter.api.AfterEach +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertNotNull +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.DisplayName +import org.junit.jupiter.api.Test +import java.io.ByteArrayInputStream +import java.util.UUID +import java.util.concurrent.CopyOnWriteArrayList + +class SupervisorTest { + // streamLines() tests + + @Test + @DisplayName("streamLines: empty stream produces no callbacks") + fun streamLinesEmptyStream() = + runBlocking { + val lines = mutableListOf() + Supervisor.streamLines(ByteArrayInputStream(ByteArray(0))) { lines.add(it) } + assertTrue(lines.isEmpty()) + } + + @Test + @DisplayName("streamLines: single line") + fun streamLinesSingleLine() = + runBlocking { + val lines = mutableListOf() + Supervisor.streamLines(ByteArrayInputStream("hello\n".toByteArray())) { lines.add(it) } + assertEquals(listOf("hello"), lines) + } + + @Test + @DisplayName("streamLines: multiple lines") + fun streamLinesMultipleLines() = + runBlocking { + val input = "line1\nline2\nline3\n".toByteArray() + val lines = mutableListOf() + Supervisor.streamLines(ByteArrayInputStream(input)) { lines.add(it) } + assertEquals(listOf("line1", "line2", "line3"), lines) + } + + @Test + @DisplayName("streamLines: preserves blank lines between content") + fun streamLinesWithBlankLines() = + runBlocking { + val input = "first\n\nsecond\n".toByteArray() + val lines = mutableListOf() + Supervisor.streamLines(ByteArrayInputStream(input)) { lines.add(it) } + assertEquals(listOf("first", "", "second"), lines) + } + + @Test + @DisplayName("streamLines: handles line without trailing newline") + fun streamLinesNoTrailingNewline() = + runBlocking { + val lines = mutableListOf() + Supervisor.streamLines(ByteArrayInputStream("no-newline".toByteArray())) { lines.add(it) } + assertEquals(listOf("no-newline"), lines) + } + + @Test + @DisplayName("streamLines: handles large number of lines") + fun streamLinesManyLines() = + runBlocking { + val count = 10_000 + val input = (1..count).joinToString("\n") { "line-$it" }.toByteArray() + val lines = CopyOnWriteArrayList() + Supervisor.streamLines(ByteArrayInputStream(input)) { lines.add(it) } + assertEquals(count, lines.size) + assertEquals("line-1", lines.first()) + assertEquals("line-$count", lines.last()) + } + + // Data class tests + + @Test + @DisplayName("SupervisorTaskInstance: all fields populated") + fun supervisorTaskInstanceAllFields() { + val id = UUID.randomUUID() + val dagVersionId = UUID.randomUUID() + val carrier = mapOf("trace" to "abc") + val ti = + SupervisorTaskInstance( + id = id, + taskId = "my_task", + dagId = "my_dag", + runId = "run_1", + tryNumber = 2, + dagVersionId = dagVersionId, + mapIndex = 5, + contextCarrier = carrier, + ) + assertEquals(id, ti.id) + assertEquals("my_task", ti.taskId) + assertEquals("my_dag", ti.dagId) + assertEquals("run_1", ti.runId) + assertEquals(2, ti.tryNumber) + assertEquals(dagVersionId, ti.dagVersionId) + assertEquals(5, ti.mapIndex) + assertEquals(carrier, ti.contextCarrier) + } + + @Test + @DisplayName("SupervisorTaskInstance: null optional fields") + fun supervisorTaskInstanceNullOptionals() { + val ti = + SupervisorTaskInstance( + id = UUID.randomUUID(), + taskId = "t", + dagId = "d", + runId = "r", + tryNumber = 1, + dagVersionId = UUID.randomUUID(), + mapIndex = null, + ) + assertEquals(null, ti.mapIndex) + assertEquals(null, ti.contextCarrier) + } + + @Test + @DisplayName("SupervisorTaskInstance: data class equality") + fun supervisorTaskInstanceEquality() { + val id = UUID.randomUUID() + val dvId = UUID.randomUUID() + val a = SupervisorTaskInstance(id, "t", "d", "r", 1, dvId, null) + val b = SupervisorTaskInstance(id, "t", "d", "r", 1, dvId, null) + assertEquals(a, b) + assertEquals(a.hashCode(), b.hashCode()) + } + + @Test + @DisplayName("SupervisorBundleInfo: with and without version") + fun supervisorBundleInfo() { + val withVersion = SupervisorBundleInfo("my-bundle", "v2") + assertEquals("my-bundle", withVersion.name) + assertEquals("v2", withVersion.version) + + val withoutVersion = SupervisorBundleInfo("my-bundle", null) + assertEquals(null, withoutVersion.version) + } + + @Test + @DisplayName("SupervisorResult: success and failure states") + fun supervisorResult() { + val success = SupervisorResult(ExecutionTaskInstanceState.SUCCESS, 0) + assertEquals(ExecutionTaskInstanceState.SUCCESS, success.finalState) + assertEquals(0, success.exitCode) + + val failure = SupervisorResult(ExecutionTaskInstanceState.FAILED, 1) + assertEquals(ExecutionTaskInstanceState.FAILED, failure.finalState) + assertEquals(1, failure.exitCode) + } + + @Test + @DisplayName("SupervisorRequest: default values") + fun supervisorRequestDefaults() { + val request = + SupervisorRequest( + mainClass = "com.example.Main", + classpath = "/app/lib/*", + executionApiBaseUrl = "http://localhost:8080/execution/", + token = "test-token", + workerName = "worker-1", + userName = "airflow", + dagRelPath = "dags/my_dag.jar", + bundleInfo = SupervisorBundleInfo("bundle", "1"), + taskInstance = + SupervisorTaskInstance( + UUID.randomUUID(), + "task", + "dag", + "run", + 1, + UUID.randomUUID(), + null, + ), + ) + assertEquals("", request.sentryIntegration) + } + + // Integration tests: Supervisor.run() with real subprocess + MockWebServer + + private lateinit var mockServer: MockWebServer + + @BeforeEach + fun setUp() { + mockServer = MockWebServer() + } + + @AfterEach + fun tearDown() { + mockServer.shutdown() + } + + /** Minimal valid JSON for TIRunContext that Jackson can deserialize with unknown-props disabled. */ + private val tiRunContextJson = + """ + { + "dag_run": { + "dag_id": "test_dag", + "run_id": "run_1", + "logical_date": "2026-01-01T00:00:00Z", + "data_interval_start": "2026-01-01T00:00:00Z", + "data_interval_end": "2026-01-01T01:00:00Z", + "start_date": "2026-01-01T00:00:00Z", + "run_after": "2026-01-01T00:00:00Z", + "run_type": "manual" + }, + "max_tries": 0, + "should_retry": false + } + """.trimIndent() + + private fun request(mainClass: String = TestSucceedSubprocess::class.java.name): SupervisorRequest { + val classpath = System.getProperty("java.class.path") + return SupervisorRequest( + mainClass = mainClass, + classpath = classpath, + executionApiBaseUrl = mockServer.url("/execution/").toString(), + token = "test-jwt-token", + workerName = "test-worker", + userName = "testuser", + dagRelPath = "dags/test.jar", + bundleInfo = SupervisorBundleInfo("test-bundle", "1"), + taskInstance = + SupervisorTaskInstance( + id = UUID.randomUUID(), + taskId = "my_task", + dagId = "test_dag", + runId = "run_1", + tryNumber = 1, + dagVersionId = UUID.randomUUID(), + mapIndex = null, + ), + sentryIntegration = "", + onLogLine = {}, + ) + } + + /** + * A dispatcher that returns a TIRunContext for the /run endpoint and 200 OK for state updates. + * Also handles variable/connection/xcom API calls for the more complex test scenarios. + */ + private fun apiDispatcher(): Dispatcher = + object : Dispatcher() { + override fun dispatch(request: RecordedRequest): MockResponse { + val path = request.path ?: return MockResponse().setResponseCode(404) + return when { + // tiRun: PATCH .../task-instances/{id}/run + path.contains("/run") && request.method == "PATCH" -> + MockResponse() + .setResponseCode(200) + .setHeader("Content-Type", "application/json") + .setBody(tiRunContextJson) + + // succeed/finish: PATCH .../task-instances/{id}/state + path.contains("/state") && request.method == "PATCH" -> + MockResponse().setResponseCode(200) + + // getVariable: GET .../variables/{key} + path.contains("/variables/") && request.method == "GET" -> + MockResponse() + .setResponseCode(200) + .setHeader("Content-Type", "application/json") + .setBody("""{"key": "test_var", "value": "hello"}""") + + // getConnection: GET .../connections/{id} + path.contains("/connections/") && request.method == "GET" -> + MockResponse() + .setResponseCode(200) + .setHeader("Content-Type", "application/json") + .setBody("""{"conn_id": "test_conn", "conn_type": "http"}""") + + // setXcom: POST .../xcoms/... + path.contains("/xcoms/") && request.method == "POST" -> + MockResponse() + .setResponseCode(200) + .setHeader("Content-Type", "application/json") + .setBody("{}") + + else -> + MockResponse().setResponseCode(404).setBody("Not found: $path") + } + } + } + + @Test + @DisplayName("run: successful task execution returns SUCCESS with exit code 0") + fun runSuccessfulTask() = + runBlocking { + mockServer.dispatcher = apiDispatcher() + mockServer.start() + + val result = Supervisor.run(request()) + + assertEquals(ExecutionTaskInstanceState.SUCCESS, result.finalState) + assertEquals(0, result.exitCode) + } + + @Test + @DisplayName("run: task reporting failed state returns FAILED") + fun runFailedTask() = + runBlocking { + mockServer.dispatcher = apiDispatcher() + mockServer.start() + + val result = Supervisor.run(request(mainClass = TestFailSubprocess::class.java.name)) + + assertEquals(ExecutionTaskInstanceState.FAILED, result.finalState) + assertEquals(0, result.exitCode) // process exits cleanly, but reports failed state + } + + @Test + @DisplayName("run: task requesting a variable before succeeding") + fun runTaskWithGetVariable() = + runBlocking { + mockServer.dispatcher = apiDispatcher() + mockServer.start() + + val result = Supervisor.run(request(mainClass = TestGetVariableSubprocess::class.java.name)) + + assertEquals(ExecutionTaskInstanceState.SUCCESS, result.finalState) + assertEquals(0, result.exitCode) + + // Verify the variable request was made to the mock server. + val requests = (1..mockServer.requestCount).map { mockServer.takeRequest() } + assertTrue(requests.any { it.path?.contains("/variables/") == true }) + } + + @Test + @DisplayName("run: task requesting a connection before succeeding") + fun runTaskWithGetConnection() = + runBlocking { + mockServer.dispatcher = apiDispatcher() + mockServer.start() + + val result = Supervisor.run(request(mainClass = TestGetConnectionSubprocess::class.java.name)) + + assertEquals(ExecutionTaskInstanceState.SUCCESS, result.finalState) + assertEquals(0, result.exitCode) + + val requests = (1..mockServer.requestCount).map { mockServer.takeRequest() } + assertTrue(requests.any { it.path?.contains("/connections/") == true }) + } + + @Test + @DisplayName("run: reports task as running to execution API with correct payload") + fun runReportsRunningState() = + runBlocking { + mockServer.dispatcher = apiDispatcher() + mockServer.start() + + Supervisor.run(request()) + + val requests = (1..mockServer.requestCount).map { mockServer.takeRequest() } + val runRequest = requests.first { it.path?.contains("/run") == true && it.method == "PATCH" } + assertEquals("PATCH", runRequest.method) + val body = runRequest.body.readUtf8() + assertTrue(body.contains("test-worker"), "Should contain hostname") + assertTrue(body.contains("testuser"), "Should contain unix name") + } + + @Test + @DisplayName("run: reports terminal state to execution API") + fun runReportsTerminalState() = + runBlocking { + mockServer.dispatcher = apiDispatcher() + mockServer.start() + + Supervisor.run(request()) + + val requests = (1..mockServer.requestCount).map { mockServer.takeRequest() } + val stateRequest = requests.first { it.path?.contains("/state") == true } + assertEquals("PATCH", stateRequest.method) + } + + @Test + @DisplayName("run: sends bearer token in all API requests") + fun runSendsBearerToken() = + runBlocking { + mockServer.dispatcher = apiDispatcher() + mockServer.start() + + Supervisor.run(request()) + + val requests = (1..mockServer.requestCount).map { mockServer.takeRequest() } + for (req in requests) { + val auth = req.getHeader("Authorization") + assertNotNull(auth, "Authorization header should be present on ${req.path}") + assertTrue(auth!!.startsWith("Bearer "), "Should use Bearer auth on ${req.path}") + } + } + + @Test + @DisplayName("run: collects stdout and stderr from subprocess") + fun runCollectsLogLines() = + runBlocking { + mockServer.dispatcher = apiDispatcher() + mockServer.start() + + val logLines = CopyOnWriteArrayList() + val req = + request(mainClass = TestStdoutSubprocess::class.java.name).copy( + onLogLine = { logLines.add(it) }, + ) + + Supervisor.run(req) + + assertTrue(logLines.any { it == "stdout-line-1" }, "Should capture stdout: $logLines") + assertTrue(logLines.any { it == "stdout-line-2" }, "Should capture stdout: $logLines") + assertTrue(logLines.any { it == "stderr-line-1" }, "Should capture stderr: $logLines") + } + + @Test + @DisplayName("run: non-zero exit code overrides final state to FAILED") + fun runNonZeroExitCodeOverridesState() = + runBlocking { + mockServer.dispatcher = apiDispatcher() + mockServer.start() + + // TestSucceedThenCrashSubprocess sends SucceedTask (which would normally yield SUCCESS) + // but then exits with code 42. Supervisor should override the final state to FAILED. + val result = Supervisor.run(request(mainClass = TestSucceedThenCrashSubprocess::class.java.name)) + + assertEquals(ExecutionTaskInstanceState.FAILED, result.finalState) + assertEquals(42, result.exitCode) + } +} diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TestTaskSubprocess.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TestTaskSubprocess.kt index 5b5f441dba82c..697f5da39a95a 100644 --- a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TestTaskSubprocess.kt +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TestTaskSubprocess.kt @@ -10,7 +10,7 @@ g/* * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, - * software distributed under the License is dis etributed on an + * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations From aa124668ae5041ba51f7058b0212ceb5b25d678b Mon Sep 17 00:00:00 2001 From: LIU ZHE YOU Date: Tue, 28 Apr 2026 18:02:58 +0800 Subject: [PATCH 06/16] CI: Fix yamllint check --- java-sdk/validation/serialization/test_dags.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java-sdk/validation/serialization/test_dags.yaml b/java-sdk/validation/serialization/test_dags.yaml index f2eb014b60721..99d3b6a8e1374 100644 --- a/java-sdk/validation/serialization/test_dags.yaml +++ b/java-sdk/validation/serialization/test_dags.yaml @@ -26,7 +26,7 @@ # dagrun_timeout_seconds — number of seconds, parsed to timedelta / Duration # tags — list of strings, converted to set on both sides # access_control — nested map; innermost lists become sets - +--- test_cases: # ---- schedule variants ------------------------------------------------ - name: "minimal_dag" From 0bbd499ddd5015cf4c55ea8bfa34f1bf0a5106e9 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Mon, 4 May 2026 16:10:14 +0800 Subject: [PATCH 07/16] Fix stray character --- .../org/apache/airflow/sdk/execution/TestTaskSubprocess.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TestTaskSubprocess.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TestTaskSubprocess.kt index 697f5da39a95a..0bd392279fc8c 100644 --- a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TestTaskSubprocess.kt +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TestTaskSubprocess.kt @@ -1,4 +1,4 @@ -g/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information From 728813f9173727b40a71e2003b6ac4136125ac0c Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Wed, 6 May 2026 07:41:08 +0800 Subject: [PATCH 08/16] Introduce Bundle and Dag Builder patterns (#1572) The existing DagBundle class is renamed to BundleBuilder with a new 'build()' method to abstract away bundle creation details from the user. The dag declaration in the example bundle is moved to a separate class to better mirror a real-world use case. In practice, a bundle would contain more than one dag, and the previous example is unclear about how the user should structure the code to do that. The new example structure uses a builder pattern to create one dag from a class declaration, while the bundle builder implementation aggregates all dags and acts as the executable's entry point. --- .../adr/0001-java-sdk-airflow-integration.md | 12 ++-- java-sdk/adr/0002-dag-parsing.md | 14 ++-- java-sdk/adr/0003-workload-execution.md | 12 +--- java-sdk/adr/0004-pure-java-dags.md | 67 ++++++++++--------- java-sdk/example/build.gradle.kts | 2 +- .../airflow/example/ExampleBundleBuilder.java | 18 +++++ ...vaExample.java => JavaExampleBuilder.java} | 25 +++---- .../kotlin/org/apache/airflow/sdk/Bundle.kt | 13 ++++ .../org/apache/airflow/sdk/BundleInspector.kt | 8 +-- .../org/apache/airflow/sdk/DagBundle.kt | 31 --------- 10 files changed, 94 insertions(+), 108 deletions(-) create mode 100644 java-sdk/example/src/java/org/apache/airflow/example/ExampleBundleBuilder.java rename java-sdk/example/src/java/org/apache/airflow/example/{JavaExample.java => JavaExampleBuilder.java} (77%) delete mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/DagBundle.kt diff --git a/java-sdk/adr/0001-java-sdk-airflow-integration.md b/java-sdk/adr/0001-java-sdk-airflow-integration.md index 1609e99a413e2..a3f8703df1e27 100644 --- a/java-sdk/adr/0001-java-sdk-airflow-integration.md +++ b/java-sdk/adr/0001-java-sdk-airflow-integration.md @@ -47,10 +47,10 @@ There is one way to write a non-Python task: implement the language SDK's task i We provide two approaches for integrating non-Python tasks into a DAG: **a) Pure Java DAG** — define the entire DAG in Java, with no Python file at all. -The Java SDK provides `DagBundle`, `Dag`, and `Task` interfaces: +The Java SDK provides `BundleBuilder`, `Dag`, and `Task` interfaces: ```java -public class JavaExample implements DagBundle { +public class JavaExampleBuilder { public static class Extract implements Task { public void execute(Client client) throws Exception { @@ -67,11 +67,11 @@ public class JavaExample implements DagBundle { } @Override - public List getDags() { + public Dag build() { var dag = new Dag("java_example", null, "@daily"); dag.addTask("extract", Extract.class, List.of()); dag.addTask("transform", Transform.class, List.of("extract")); - return List.of(dag); + return dag; } } ``` @@ -101,7 +101,7 @@ def simple_dag(): Both approaches are supported in parallel. A pure Java DAG needs no Python at all for authoring. A `@task.stub` DAG requires a Python file but lets you mix Python operators and non-Python tasks in a single pipeline. -> **Note:** The current `DagBundle` interface used in pure Java DAGs is subject to review before the SDK reaches 1.0. Subclassing `Dag` directly may be a more natural fit and is being considered for post-OSS-integration. +> **Note:** The current `BundleBuilder` interface used in pure Java DAGs is subject to review before the SDK reaches 1.0. Subclassing `Dag` directly may be a more natural fit and is being considered for post-OSS-integration. ### The Coordinator Layer @@ -199,7 +199,7 @@ class BaseCoordinator: @classmethod def get_code_from_file(cls, fileloc: str) -> str: - """Return the actual DAG code (the content of JavaExample.java in this case""" + """Return the actual DAG code (the content of JavaExampleBuilder.java in this case""" ... # DAG Parsing (called in forked DagFileProcessor child process) diff --git a/java-sdk/adr/0002-dag-parsing.md b/java-sdk/adr/0002-dag-parsing.md index 235658d5460bd..f5ea2c6651b5e 100644 --- a/java-sdk/adr/0002-dag-parsing.md +++ b/java-sdk/adr/0002-dag-parsing.md @@ -283,7 +283,7 @@ DagParser(request.file).parse(bundle) │ ├── Returns DagParsingResult(fileloc=file, dags=bundle.dags) │ The DAGs were already loaded into the Bundle at startup - │ via DagBundle.getDags() + │ via BundleBuilder.getDags() │ ▼ sendMessage(frame.id, result) @@ -308,12 +308,12 @@ sendMessage(frame.id, result) shutDownRequested = true ← one-shot, process will exit ``` -**Java SDK DagBundle Interface:** +**Java SDK BundleBuilder Interface:** -Bundle authors implement `DagBundle` to define their DAGs: +Bundle authors implement `BundleBuilder` to define their DAGs: ```java -public class JavaExample implements DagBundle { +public class ExampleBundleBuilder implements BundleBuilder { @Override public List getDags() { var dag = new Dag("java_example", null, "@daily"); @@ -324,11 +324,7 @@ public class JavaExample implements DagBundle { } public static void main(String[] args) { - var example = new JavaExample(); - var bundle = new Bundle( - JavaExample.class.getPackage().getImplementationVersion(), - example.getDags() - ); + var bundle = new ExampleBundleBuilder().build(); Server.create(args).serve(bundle); } } diff --git a/java-sdk/adr/0003-workload-execution.md b/java-sdk/adr/0003-workload-execution.md index afa90a16e9e71..5f518705622fd 100644 --- a/java-sdk/adr/0003-workload-execution.md +++ b/java-sdk/adr/0003-workload-execution.md @@ -418,21 +418,15 @@ public static class Load implements Task { **Java SDK Complete Bundle Entry Point:** ```java -public class JavaExample implements DagBundle { +public class ExampleBundleBuilder implements BundleBuilder { @Override public List getDags() { - var dag = new Dag("java_example", null, "@daily"); - dag.addTask("extract", Extract.class, List.of()); - dag.addTask("transform", Transform.class, List.of("extract")); - dag.addTask("load", Load.class, List.of("transform")); + var dag = JavaExampleBuilder.build(); return List.of(dag); } public static void main(String[] args) { - var bundle = new Bundle( - JavaExample.class.getPackage().getImplementationVersion(), - new JavaExample().getDags() - ); + var bundle = new ExampleBundleBuilder().build(); Server.create(args).serve(bundle); // parses --comm/--logs, connects, enters message loop } } diff --git a/java-sdk/adr/0004-pure-java-dags.md b/java-sdk/adr/0004-pure-java-dags.md index a3c42672c24cf..9ca153be2d7c4 100644 --- a/java-sdk/adr/0004-pure-java-dags.md +++ b/java-sdk/adr/0004-pure-java-dags.md @@ -25,7 +25,7 @@ Accepted ## Context -[ADR-0001](0001-java-sdk-airflow-integration.md) introduces two ways to integrate non-Python tasks: `@task.stub` (mixed Python+Java DAGs) and pure Java DAGs (entire DAG in Java via `DagBundle`). [ADR-0002](0002-dag-parsing.md) and [ADR-0003](0003-workload-execution.md) describe the coordinator infrastructure for DAG parsing and task execution respectively. +[ADR-0001](0001-java-sdk-airflow-integration.md) introduces two ways to integrate non-Python tasks: `@task.stub` (mixed Python+Java DAGs) and pure Java DAGs (entire DAG in Java via `BundleBuilder`). [ADR-0002](0002-dag-parsing.md) and [ADR-0003](0003-workload-execution.md) describe the coordinator infrastructure for DAG parsing and task execution respectively. This ADR focuses on the Java-SDK-specific concerns that make pure Java DAGs work end-to-end — build-time metadata generation, source code packaging for UI visibility, and JAR manifest conventions — rather than the shared coordinator infrastructure already covered in those ADRs. @@ -37,17 +37,17 @@ The central challenge is that Airflow Core expects to read DAG metadata and sour The JAR manifest (`META-INF/MANIFEST.MF`) carries three SDK-specific attributes that Airflow and the Java SDK use to bootstrap a bundle: -| Attribute | Example Value | Purpose | -|---|---|---| -| `Main-Class` | `org.apache.airflow.example.JavaExample` | Standard Java attribute; the coordinator uses it to launch the JVM | -| `Airflow-Java-SDK-Metadata` | `airflow-metadata.yaml` | Points to the embedded metadata file (dag IDs, task IDs) | -| `Airflow-Java-SDK-Dag-Code` | `JavaExample.java` | Points to the embedded source file for Airflow UI display | +| Attribute | Example Value | Purpose | +|---|---------------------------------------------------|---| +| `Main-Class` | `org.apache.airflow.example.ExampleBundleBuilder` | Standard Java attribute; the coordinator uses it to launch the JVM | +| `Airflow-Java-SDK-Metadata` | `airflow-metadata.yaml` | Points to the embedded metadata file (dag IDs, task IDs) | +| `Airflow-Java-SDK-Dag-Code` | `JavaExampleBuilder.java` | Points to the embedded source file for Airflow UI display | These attributes are set in the Gradle build (see [Build-Time Packaging](#build-time-packaging-gradle) below). The Python-side coordinator reads `Main-Class` to construct the launch command; `BundleScanner` reads `Airflow-Java-SDK-Metadata` to discover DAG IDs without launching the JVM. ### Build-Time Metadata: `airflow-metadata.yaml` -At build time, the SDK runs `BundleInspector` — a build-time utility that reflectively instantiates the user's `DagBundle` class, calls `getDags()`, and writes a YAML file listing every DAG ID and its task IDs: +At build time, the SDK runs `BundleInspector` — a build-time utility that reflectively instantiates the user's `BundleBuilder` class, calls `getDags()`, and writes a YAML file listing every DAG ID and its task IDs: ```yaml dags: @@ -71,8 +71,8 @@ object BundleInspector { val className = args[0] val outputPath = args[1] val clazz = Class.forName(className) - val instance = clazz.getDeclaredConstructor().newInstance() as? DagBundle - ?: error("$className does not implement DagBundle") + val instance = clazz.getDeclaredConstructor().newInstance() as? BundleBuilder + ?: error("$className does not implement BundleBuilder") val dags = instance.getDags() File(outputPath).apply { parentFile.mkdirs() }.writeText(toYaml(dags)) } @@ -145,12 +145,12 @@ The resulting JAR contains: example.jar ├── META-INF/MANIFEST.MF (Main-Class, SDK attributes) ├── airflow-metadata.yaml (dag IDs + task IDs) -├── JavaExample.java (raw source for UI display) +├── JavaExampleBuilder.java (raw source for UI display) ├── org/apache/airflow/example/ -│ ├── JavaExample.class (compiled bundle entry point) -│ ├── JavaExample$Extract.class -│ ├── JavaExample$Transform.class -│ └── JavaExample$Load.class +│ ├── JavaExampleBuildser.class (compiled bundle entry point) +│ ├── JavaExampleBuilder$Extract.class +│ ├── JavaExampleBuilder$Transform.class +│ └── JavaExampleBuilder$Load.class └── ... (SDK + dependency classes) ``` @@ -174,12 +174,12 @@ It supports two directory layouts: For each JAR, it reads the `Airflow-Java-SDK-Metadata` manifest attribute, extracts the referenced YAML, parses DAG IDs, and returns a mapping from `dag_id` to `ResolvedBundle`. -### The DagBundle Authoring API +### The BundleBuilder Authoring API -Bundle authors implement `DagBundle` to define their DAGs: +Bundle authors implement builder classes to define their DAGs: ```java -public class JavaExample implements DagBundle { +public class JavaExampleBuilder { public static class Extract implements Task { public void execute(Client client) throws Exception { @@ -195,28 +195,33 @@ public class JavaExample implements DagBundle { } } - @Override - public List getDags() { + public static Dag build() { var dag = new Dag("java_example", null, "@daily"); dag.addTask("extract", Extract.class, List.of()); dag.addTask("transform", Transform.class, List.of("extract")); - return List.of(dag); + return dag; } +} +``` - public static void main(String[] args) { - var example = new JavaExample(); - var bundle = new Bundle( - JavaExample.class.getPackage().getImplementationVersion(), - example.getDags() - ); - Server.create(args).serve(bundle); - } +and then collect DAGs with a BundleBuilder: + +```java +public class ExampleBundleBuilder implements BundleBuilder { + public Iterable getDags() { + return List.of(JavaExampleBuilder.build()) + } + + public static void main(String[] args) { + var bundle = new ExampleBundleBuilder().build(); + Server.create(args).serve(bundle); + } } ``` -The `main()` method is the JVM entry point that the coordinator launches. It wires the `DagBundle` to the SDK's TCP communication layer (`Server` → `CoordinatorComm`), which handles DAG parsing requests and task execution commands as described in [ADR-0002](0002-dag-parsing.md) and [ADR-0003](0003-workload-execution.md). +The `main()` method is the JVM entry point that the coordinator launches. It wires the `BundleBuilder` to the SDK's TCP communication layer (`Server` → `CoordinatorComm`), which handles DAG parsing requests and task execution commands as described in [ADR-0002](0002-dag-parsing.md) and [ADR-0003](0003-workload-execution.md). -> **Note:** The current `DagBundle` interface is subject to review before the SDK reaches 1.0. Subclassing `Dag` directly may be a more natural fit and is being considered for post-OSS-integration. +> **Note:** The current `BundleBuilder` interface is subject to review before the SDK reaches 1.0. Subclassing `Dag` directly may be a more natural fit and is being considered for post-OSS-integration. ## Consequences @@ -224,5 +229,5 @@ The `main()` method is the JVM entry point that the coordinator launches. It wir - Build-time metadata generation means DAG IDs can be discovered without JVM startup — important for `BundleScanner` and tooling. - Source code packaging enables Airflow UI display with no changes to Airflow Core's `DagCode` infrastructure. - The manifest convention (`Airflow-Java-SDK-*` attributes) is extensible — future attributes can carry additional metadata without breaking existing tooling. -- The build-time `BundleInspector` step adds a compile-time dependency on the SDK and requires the `DagBundle` class to be instantiable without side effects (no I/O, no connections in the constructor). +- The build-time `BundleInspector` step adds a compile-time dependency on the SDK and requires the `BundleBuilder` class to be instantiable without side effects (no I/O, no connections in the constructor). - Bundle authors must follow the Gradle packaging pattern (or replicate it in Maven/other build tools) — this is SDK-specific boilerplate that doesn't exist for Python DAGs. diff --git a/java-sdk/example/build.gradle.kts b/java-sdk/example/build.gradle.kts index a0bb2d4f8c6b8..674bde14cd0da 100644 --- a/java-sdk/example/build.gradle.kts +++ b/java-sdk/example/build.gradle.kts @@ -33,7 +33,7 @@ sourceSets { } application { - mainClass = "org.apache.airflow.example.JavaExample" + mainClass = "org.apache.airflow.example.ExampleBundleBuilder" } val bundleMainClass = application.mainClass.get() diff --git a/java-sdk/example/src/java/org/apache/airflow/example/ExampleBundleBuilder.java b/java-sdk/example/src/java/org/apache/airflow/example/ExampleBundleBuilder.java new file mode 100644 index 0000000000000..745d6c1696359 --- /dev/null +++ b/java-sdk/example/src/java/org/apache/airflow/example/ExampleBundleBuilder.java @@ -0,0 +1,18 @@ +package org.apache.airflow.example; + +import org.apache.airflow.sdk.*; +import org.jetbrains.annotations.NotNull; +import java.util.List; + +public class ExampleBundleBuilder implements BundleBuilder { + @NotNull + @Override + public Iterable getDags() { + return List.of(JavaExampleBuilder.build()); + } + + public static void main(String[] args) { + var bundle = new ExampleBundleBuilder().build(); + Server.create(args).serve(bundle); + } +} diff --git a/java-sdk/example/src/java/org/apache/airflow/example/JavaExample.java b/java-sdk/example/src/java/org/apache/airflow/example/JavaExampleBuilder.java similarity index 77% rename from java-sdk/example/src/java/org/apache/airflow/example/JavaExample.java rename to java-sdk/example/src/java/org/apache/airflow/example/JavaExampleBuilder.java index d6a82c799ef62..21c052110c571 100644 --- a/java-sdk/example/src/java/org/apache/airflow/example/JavaExample.java +++ b/java-sdk/example/src/java/org/apache/airflow/example/JavaExampleBuilder.java @@ -25,8 +25,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class JavaExample implements DagBundle { - private static final Logger logger = LoggerFactory.getLogger(JavaExample.class); +public class JavaExampleBuilder { + private static final Logger logger = LoggerFactory.getLogger(JavaExampleBuilder.class); public static class Extract implements Task { public void execute(Client client) throws Exception { @@ -69,20 +69,11 @@ public void execute(Client client) { } } - @Override - public List getDags() { - var javaExample = new Dag("java_example", /* description= */ null, /* schedule= */ "@daily"); - javaExample.addTask("extract", Extract.class, List.of()); - javaExample.addTask("transform", Transform.class, List.of("extract")); - javaExample.addTask("load", Load.class, List.of("transform")); - return List.of(javaExample); - } - - public static void main(String[] args) { - var example = new JavaExample(); - var bundle = - new Bundle(JavaExample.class.getPackage().getImplementationVersion(), example.getDags()); - - Server.create(args).serve(bundle); + public static Dag build() { + var dag = new Dag("java_example", /* description= */ null, /* schedule= */ "@daily"); + dag.addTask("extract", Extract.class, List.of()); + dag.addTask("transform", Transform.class, List.of("extract")); + dag.addTask("load", Load.class, List.of("transform")); + return dag; } } diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Bundle.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Bundle.kt index 514ffc788b2e6..22ff48cf02dea 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Bundle.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Bundle.kt @@ -35,3 +35,16 @@ private fun Iterable.associateByDagId(): Map { } return dagMap } + +/** + * Interface for declaring DAGs in a bundle. + * + *

Implement this interface in the class specified as {@code Main-Class} in your JAR manifest. + * The build system instantiates this class at compile time to extract dag_ids and task_ids + * into the JAR manifest, enabling inspection of bundled DAGs without running the full process. + */ +interface BundleBuilder { + fun getDags(): Iterable + + fun build(): Bundle = Bundle(this::class.java.`package`.implementationVersion ?: "0", getDags()) +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleInspector.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleInspector.kt index 186daa5b8fabc..e69c98f916adc 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleInspector.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleInspector.kt @@ -22,7 +22,7 @@ package org.apache.airflow.sdk import java.io.File /** - * Build-time utility that inspects a [DagBundle] implementation and writes + * Build-time utility that inspects a [BundleBuilder] implementation and writes * dag_ids and task_ids to a YAML metadata file for inclusion in the JAR. * * Usage: {@code java -cp org.apache.airflow.sdk.BundleInspector } @@ -36,8 +36,8 @@ object BundleInspector { val clazz = Class.forName(className) val instance = - clazz.getDeclaredConstructor().newInstance() as? DagBundle - ?: error("$className does not implement ${DagBundle::class.qualifiedName}") + clazz.getDeclaredConstructor().newInstance() as? BundleBuilder + ?: error("$className does not implement ${BundleBuilder::class.qualifiedName}") val dags = instance.getDags() val outputFile = File(outputPath) @@ -45,7 +45,7 @@ object BundleInspector { outputFile.writeText(toYaml(dags)) } - internal fun toYaml(dags: List): String = + private fun toYaml(dags: Iterable): String = buildString { appendLine("dags:") for (dag in dags) { diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/DagBundle.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/DagBundle.kt deleted file mode 100644 index 0b266b83c7571..0000000000000 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/DagBundle.kt +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.airflow.sdk - -/** - * Interface for declaring DAGs in a bundle. - * - *

Implement this interface in the class specified as {@code Main-Class} in your JAR manifest. - * The build system instantiates this class at compile time to extract dag_ids and task_ids - * into the JAR manifest, enabling inspection of bundled DAGs without running the full process. - */ -interface DagBundle { - fun getDags(): List -} From 7ac6547e0e8242b31e3d7021e653a1ac02559ce0 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Wed, 6 May 2026 10:20:57 +0800 Subject: [PATCH 09/16] Apply ktlintFormat changes (#1574) I ran ./gradlew ktlintFormat through to code base and it changed some files. These are probably missed since we have not migrated the pre-commit hooks to the Airflow repo and the unformatted code isn't warned in the Java SDK PR. --- .../airflow/sdk/execution/Supervisor.kt | 26 +++++++++---------- .../airflow/sdk/execution/SupervisorTest.kt | 20 +++++++------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Supervisor.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Supervisor.kt index 393e37c8cf005..cad988eab59d2 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Supervisor.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Supervisor.kt @@ -26,10 +26,13 @@ import kotlinx.coroutines.launch import kotlinx.coroutines.withContext import org.apache.airflow.sdk.ensureTrailingSlash import org.apache.airflow.sdk.execution.api.client.ApiClient +import org.apache.airflow.sdk.execution.api.model.BundleInfo import org.apache.airflow.sdk.execution.api.model.TIEnterRunningPayload import org.apache.airflow.sdk.execution.api.model.TIRunContext import org.apache.airflow.sdk.execution.api.model.TISuccessStatePayload import org.apache.airflow.sdk.execution.api.model.TITerminalStatePayload +import org.apache.airflow.sdk.execution.api.model.TaskInstance +import org.apache.airflow.sdk.execution.api.model.TaskInstanceState import org.apache.airflow.sdk.execution.api.model.TerminalStateNonSuccess import org.apache.airflow.sdk.execution.api.route.TaskInstancesApi import org.apache.airflow.sdk.execution.api.route.XComsApi @@ -42,9 +45,6 @@ import java.net.Socket import java.time.LocalDate import java.time.OffsetDateTime import java.util.UUID -import org.apache.airflow.sdk.execution.api.model.BundleInfo as ExecutionBundleInfo -import org.apache.airflow.sdk.execution.api.model.TaskInstance as ExecutionTaskInstance -import org.apache.airflow.sdk.execution.api.model.TaskInstanceState as ExecutionTaskInstanceState data class SupervisorTaskInstance( val id: UUID, @@ -77,7 +77,7 @@ data class SupervisorRequest( ) data class SupervisorResult( - val finalState: ExecutionTaskInstanceState, + val finalState: TaskInstanceState, val exitCode: Int, ) @@ -165,7 +165,7 @@ object Supervisor { stderrPump.join() SupervisorResult( - finalState = if (exitCode == 0) finalState else ExecutionTaskInstanceState.FAILED, + finalState = if (exitCode == 0) finalState else TaskInstanceState.FAILED, exitCode = exitCode, ) } @@ -198,13 +198,13 @@ object Supervisor { execApi: ApiClient, execClient: HttpExecApiClient, taskInstanceId: UUID, - ): ExecutionTaskInstanceState { + ): TaskInstanceState { val input = comm.getInputStream() val output = comm.getOutputStream() while (true) { val frame = TaskSdkFrames.readFrame(input, TaskSdkFrames.toSupervisorTypes) - when (val message = frame.body ?: return ExecutionTaskInstanceState.FAILED) { + when (val message = frame.body ?: return TaskInstanceState.FAILED) { is GetConnection -> reply(frame.id, output) { execClient.getConnection(message.id) @@ -231,11 +231,11 @@ object Supervisor { } is SucceedTask -> { succeed(execApi, taskInstanceId, message) - return ExecutionTaskInstanceState.SUCCESS + return TaskInstanceState.SUCCESS } is TaskState -> { finish(execApi, taskInstanceId, message) - return ExecutionTaskInstanceState.fromValue(message.state) + return TaskInstanceState.fromValue(message.state) } is ErrorResponse -> throw IllegalStateException("[${message.error}] ${message.detail}") else -> throw IllegalStateException("Unsupported Task SDK message type ${message::class.java.name}") @@ -355,7 +355,7 @@ object Supervisor { private fun startTask( api: ApiClient, - taskInstance: ExecutionTaskInstance, + taskInstance: TaskInstance, startDate: OffsetDateTime, process: Process, workerName: String, @@ -374,7 +374,7 @@ object Supervisor { } private fun SupervisorTaskInstance.toExecutionTaskInstance(workerName: String) = - ExecutionTaskInstance().also { + TaskInstance().also { it.id = id it.taskId = taskId it.dagId = dagId @@ -387,14 +387,14 @@ object Supervisor { } private fun SupervisorRequest.toStartupDetails( - taskInstance: ExecutionTaskInstance, + taskInstance: TaskInstance, tiContext: TIRunContext, startDate: OffsetDateTime, ) = StartupDetails().also { it.ti = taskInstance it.dagRelPath = dagRelPath it.bundleInfo = - ExecutionBundleInfo().also { info -> + BundleInfo().also { info -> info.name = bundleInfo.name info.version = bundleInfo.version } diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SupervisorTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SupervisorTest.kt index c8b69da21c527..6d0551b0e0b83 100644 --- a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SupervisorTest.kt +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SupervisorTest.kt @@ -23,7 +23,7 @@ import okhttp3.mockwebserver.Dispatcher import okhttp3.mockwebserver.MockResponse import okhttp3.mockwebserver.MockWebServer import okhttp3.mockwebserver.RecordedRequest -import org.apache.airflow.sdk.execution.api.model.TaskInstanceState as ExecutionTaskInstanceState +import org.apache.airflow.sdk.execution.api.model.TaskInstanceState import org.junit.jupiter.api.AfterEach import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Assertions.assertNotNull @@ -169,12 +169,12 @@ class SupervisorTest { @Test @DisplayName("SupervisorResult: success and failure states") fun supervisorResult() { - val success = SupervisorResult(ExecutionTaskInstanceState.SUCCESS, 0) - assertEquals(ExecutionTaskInstanceState.SUCCESS, success.finalState) + val success = SupervisorResult(TaskInstanceState.SUCCESS, 0) + assertEquals(TaskInstanceState.SUCCESS, success.finalState) assertEquals(0, success.exitCode) - val failure = SupervisorResult(ExecutionTaskInstanceState.FAILED, 1) - assertEquals(ExecutionTaskInstanceState.FAILED, failure.finalState) + val failure = SupervisorResult(TaskInstanceState.FAILED, 1) + assertEquals(TaskInstanceState.FAILED, failure.finalState) assertEquals(1, failure.exitCode) } @@ -320,7 +320,7 @@ class SupervisorTest { val result = Supervisor.run(request()) - assertEquals(ExecutionTaskInstanceState.SUCCESS, result.finalState) + assertEquals(TaskInstanceState.SUCCESS, result.finalState) assertEquals(0, result.exitCode) } @@ -333,7 +333,7 @@ class SupervisorTest { val result = Supervisor.run(request(mainClass = TestFailSubprocess::class.java.name)) - assertEquals(ExecutionTaskInstanceState.FAILED, result.finalState) + assertEquals(TaskInstanceState.FAILED, result.finalState) assertEquals(0, result.exitCode) // process exits cleanly, but reports failed state } @@ -346,7 +346,7 @@ class SupervisorTest { val result = Supervisor.run(request(mainClass = TestGetVariableSubprocess::class.java.name)) - assertEquals(ExecutionTaskInstanceState.SUCCESS, result.finalState) + assertEquals(TaskInstanceState.SUCCESS, result.finalState) assertEquals(0, result.exitCode) // Verify the variable request was made to the mock server. @@ -363,7 +363,7 @@ class SupervisorTest { val result = Supervisor.run(request(mainClass = TestGetConnectionSubprocess::class.java.name)) - assertEquals(ExecutionTaskInstanceState.SUCCESS, result.finalState) + assertEquals(TaskInstanceState.SUCCESS, result.finalState) assertEquals(0, result.exitCode) val requests = (1..mockServer.requestCount).map { mockServer.takeRequest() } @@ -449,7 +449,7 @@ class SupervisorTest { // but then exits with code 42. Supervisor should override the final state to FAILED. val result = Supervisor.run(request(mainClass = TestSucceedThenCrashSubprocess::class.java.name)) - assertEquals(ExecutionTaskInstanceState.FAILED, result.finalState) + assertEquals(TaskInstanceState.FAILED, result.finalState) assertEquals(42, result.exitCode) } } From ee623c09eb06bd003486ac9dbcf0f9cfe5bb6b51 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Wed, 6 May 2026 11:08:31 +0800 Subject: [PATCH 10/16] Remove dag parameters for now (#1573) Since we're not (explicitly) supporting pure Java Dags for the moment, these params are not useful. Removing them makes it easier to implement the annotation-based authoring interface. We will revert this commit when we come back to finish pure Java Dag support. --- .../airflow/example/JavaExampleBuilder.java | 2 +- .../kotlin/org/apache/airflow/sdk/Bundle.kt | 4 +- .../org/apache/airflow/sdk/BundleInspector.kt | 6 +- .../main/kotlin/org/apache/airflow/sdk/Dag.kt | 94 +++--------- .../org/apache/airflow/sdk/execution/Serde.kt | 134 ++++-------------- .../org/apache/airflow/sdk/BundleTest.kt | 2 +- .../SerializationCompatibilityTest.kt | 44 +----- 7 files changed, 55 insertions(+), 231 deletions(-) diff --git a/java-sdk/example/src/java/org/apache/airflow/example/JavaExampleBuilder.java b/java-sdk/example/src/java/org/apache/airflow/example/JavaExampleBuilder.java index 21c052110c571..e6ec386312088 100644 --- a/java-sdk/example/src/java/org/apache/airflow/example/JavaExampleBuilder.java +++ b/java-sdk/example/src/java/org/apache/airflow/example/JavaExampleBuilder.java @@ -70,7 +70,7 @@ public void execute(Client client) { } public static Dag build() { - var dag = new Dag("java_example", /* description= */ null, /* schedule= */ "@daily"); + var dag = new Dag("java_example"); dag.addTask("extract", Extract.class, List.of()); dag.addTask("transform", Transform.class, List.of("extract")); dag.addTask("load", Load.class, List.of("transform")); diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Bundle.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Bundle.kt index 22ff48cf02dea..edf962fd14578 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Bundle.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Bundle.kt @@ -29,8 +29,8 @@ class Bundle( private fun Iterable.associateByDagId(): Map { val dagMap = linkedMapOf() for (dag in this) { - require(dagMap.putIfAbsent(dag.dagId, dag) == null) { - "Duplicate dagId in bundle: ${dag.dagId}" + require(dagMap.putIfAbsent(dag.id, dag) == null) { + "Dags in bundle have duplicate ID: ${dag.id}" } } return dagMap diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleInspector.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleInspector.kt index e69c98f916adc..ae180c0d91e91 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleInspector.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/BundleInspector.kt @@ -49,10 +49,10 @@ object BundleInspector { buildString { appendLine("dags:") for (dag in dags) { - appendLine(" ${dag.dagId}:") + appendLine(" ${dag.id}:") appendLine(" tasks:") - for (taskId in dag.tasks.keys) { - appendLine(" - $taskId") + for (id in dag.tasks.keys) { + appendLine(" - $id") } } } diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Dag.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Dag.kt index c451e0c7ebcc0..17fd498168791 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Dag.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Dag.kt @@ -19,87 +19,29 @@ package org.apache.airflow.sdk -import java.time.Duration -import java.time.Instant - /** - * A Dag (Directed Acyclic Graph) is a collection of tasks with directional dependencies. - * - * A Dag has a schedule, a start date and an end date (optional). For each schedule - * (say daily or hourly), the Dag needs to run each individual task as their - * dependencies are met. + * Collection of tasks with directional dependencies. * - * @param dagId The id of the Dag; must consist exclusively of alphanumeric characters, + * @param id The Dag's id. This must consist exclusively of alphanumeric characters, * dashes, dots and underscores (all ASCII). - * @param description The description for the Dag to e.g. be shown on the webserver. - * @param schedule Defines when Dag runs are scheduled. Can be a cron expression string - * (e.g. "0 0 * * *"), a preset (e.g. "@daily", "@hourly", "@once", "@continuous"), - * or null for no schedule. - * @param startDate The timestamp from which the scheduler will attempt to backfill. - * @param endDate A date beyond which your Dag won't run; leave null for open-ended scheduling. - * @param defaultArgs A map of default parameters to be used as constructor keyword - * parameters when initialising operators. - * @param maxActiveTasks The number of task instances allowed to run concurrently per Dag run. - * @param maxActiveRuns Maximum number of active Dag runs. - * @param maxConsecutiveFailedDagRuns Maximum number of consecutive failed Dag runs, - * beyond this the scheduler will disable the Dag. - * @param dagrunTimeout Duration a DagRun is allowed to run before it times out or fails. - * @param catchup Perform scheduler catchup (or only run latest)? Defaults to false. - * @param docMd Documentation in markdown format. - * @param accessControl Optional Dag-level access control actions, e.g. - * mapOf("role1" to mapOf("DAGs" to setOf("can_read", "can_edit"))). - * @param isPausedUponCreation Whether the Dag is paused when created for the first time. - * @param tags Set of tags to help filtering Dags in the UI. - * @param ownerLinks Map of owners and their links, clickable on the Dags view UI. - * @param failFast Fails currently running tasks when a task in Dag fails. - * @param dagDisplayName The display name of the Dag on the UI. Defaults to dagId. - * @param renderTemplateAsNativeObj If true, uses native rendering for templates. - * @param params A map of Dag-level parameters accessible in templates, namespaced under - * "params". These can be overridden at the task level. */ -class Dag +class Dag( + // TODO: charset check? + val id: String, +) { + internal var tasks = mutableMapOf>() + internal var dependants = mutableMapOf>() + @JvmOverloads - constructor( - val dagId: String, - val description: String? = null, - val schedule: String? = null, - val startDate: Instant? = null, - val endDate: Instant? = null, - val defaultArgs: Map = emptyMap(), - val maxActiveTasks: Int = DEFAULT_MAX_ACTIVE_TASKS, - val maxActiveRuns: Int = DEFAULT_MAX_ACTIVE_RUNS, - val maxConsecutiveFailedDagRuns: Int = DEFAULT_MAX_CONSECUTIVE_FAILED_DAG_RUNS, - val dagrunTimeout: Duration? = null, - val catchup: Boolean = false, - val docMd: String? = null, - val accessControl: Map>>? = null, - val isPausedUponCreation: Boolean? = null, - val tags: Set = emptySet(), - val ownerLinks: Map = emptyMap(), - val failFast: Boolean = false, - val dagDisplayName: String? = null, - val renderTemplateAsNativeObj: Boolean = false, - val params: Map? = null, + fun addTask( + id: String, + definition: Class, + dependsOn: List = emptyList(), ) { - internal var tasks = mutableMapOf>() - internal var dependants = mutableMapOf>() - - @JvmOverloads - fun addTask( - id: String, - definition: Class, - dependsOn: List = emptyList(), - ) { - // TODO: Check duplicate key. - tasks[id] = definition - for (parent in dependsOn) { - dependants.getOrPut(parent) { mutableSetOf() }.add(id) - } - } - - companion object { - const val DEFAULT_MAX_ACTIVE_TASKS = 16 - const val DEFAULT_MAX_ACTIVE_RUNS = 16 - const val DEFAULT_MAX_CONSECUTIVE_FAILED_DAG_RUNS = 0 + // TODO: Check duplicate key. + tasks[id] = definition + for (parent in dependsOn) { + dependants.getOrPut(parent) { mutableSetOf() }.add(id) } } +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt index b62150bbdb40d..9962da20fc9af 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/Serde.kt @@ -45,10 +45,10 @@ private val logger = Logger(SerdeScope::class) * * Primitives pass through; complex types are wrapped in {"__type": ..., "__var": ...}. * This matches the Python BaseSerialization.serialize() output exactly: - * - dict -> {"__type": "dict", "__var": {k: serialize(v), ...}} - * - set -> {"__type": "set", "__var": [sorted items]} - * - datetime -> {"__type": "datetime", "__var": epoch_seconds} - * - timedelta -> {"__type": "timedelta", "__var": total_seconds} + * - Map -> {"__type": "dict", "__var": {k: serialize(v), ...}} + * - Set -> {"__type": "set", "__var": [sorted items]} + * - Datetime -> {"__type": "datetime", "__var": epoch_seconds} + * - Timedelta -> {"__type": "timedelta", "__var": total_seconds} */ internal fun serializeValue(value: Any?): Any? = when (value) { @@ -85,54 +85,15 @@ internal fun serializeValue(value: Any?): Any? = else -> value.toString() } -/** - * Unwrap a single level of type encoding, extracting the "__var" part. - * - * In Python's serialize_to_json, non-decorated fields are serialized then unwrapped: - * value = cls.serialize(value) - * if isinstance(value, dict) and Encoding.TYPE in value: - * value = value[Encoding.VAR] - */ -private fun unwrapTypeEncoding(value: Any?): Any? = - if (value is Map<*, *> && "__type" in value && "__var" in value) { - value["__var"] - } else { - value - } - // --------------------------------------------------------------------------- // Timetable serialization // --------------------------------------------------------------------------- -private fun serializeTimetable(schedule: String?): Serialized = - when (schedule) { - null -> - mapOf( - "__type" to "airflow.timetables.simple.NullTimetable", - "__var" to emptyMap(), - ) - "@once" -> - mapOf( - "__type" to "airflow.timetables.simple.OnceTimetable", - "__var" to emptyMap(), - ) - "@continuous" -> - mapOf( - "__type" to "airflow.timetables.simple.ContinuousTimetable", - "__var" to emptyMap(), - ) - else -> - mapOf( - "__type" to "airflow.timetables.trigger.CronTriggerTimetable", - "__var" to - mapOf( - "expression" to schedule, - "timezone" to "UTC", - "interval" to 0.0, - "run_immediately" to false, - ), - ) - } +private fun serializeTimetable(): Serialized = + mapOf( + "__type" to "airflow.timetables.simple.NullTimetable", + "__var" to emptyMap(), + ) // --------------------------------------------------------------------------- // Task serialization @@ -200,67 +161,26 @@ private fun Dag.serialize( id: String, fileloc: String, relativeFileloc: String, -): Serialized { - val result = - mutableMapOf( - // Required fields (always present) - "dag_id" to id, - "fileloc" to fileloc, - "relative_fileloc" to relativeFileloc, - // Always serialized - "timezone" to "UTC", - "timetable" to serializeTimetable(schedule), - "tasks" to tasks.entries.map { (taskId, task) -> task.serialize(taskId, dependants[taskId]) }, - "dag_dependencies" to emptyList(), - "task_group" to serializeTaskGroup(tasks.keys), - "edge_info" to emptyMap(), - "params" to (params?.let { serializeParams(it) } ?: emptyList()), - "deadline" to null, - "allowed_run_types" to null, - ) - - // Optional fields — only include if non-null. - // Non-decorated fields are serialized then unwrapped (matching Python's serialize_to_json). - description?.let { result["description"] = it } - startDate?.let { result["start_date"] = unwrapTypeEncoding(serializeValue(it)) } - endDate?.let { result["end_date"] = unwrapTypeEncoding(serializeValue(it)) } - dagrunTimeout?.let { result["dagrun_timeout"] = unwrapTypeEncoding(serializeValue(it)) } - docMd?.let { result["doc_md"] = it } - isPausedUponCreation?.let { result["is_paused_upon_creation"] = it } - - // Decorated fields (kept with __type/__var encoding, NOT unwrapped) - if (defaultArgs.isNotEmpty()) { - result["default_args"] = serializeValue(defaultArgs) - } - if (accessControl != null) { - // access_control is always serialized when not null, even if empty - result["access_control"] = serializeValue(accessControl) - } - - // Fields excluded when matching schema defaults - if (catchup) result["catchup"] = true - if (failFast) result["fail_fast"] = true - if (maxActiveTasks != Dag.DEFAULT_MAX_ACTIVE_TASKS) result["max_active_tasks"] = maxActiveTasks - if (maxActiveRuns != Dag.DEFAULT_MAX_ACTIVE_RUNS) result["max_active_runs"] = maxActiveRuns - if (maxConsecutiveFailedDagRuns != Dag.DEFAULT_MAX_CONSECUTIVE_FAILED_DAG_RUNS) { - result["max_consecutive_failed_dag_runs"] = maxConsecutiveFailedDagRuns - } - if (renderTemplateAsNativeObj) result["render_template_as_native_obj"] = true - - // dag_display_name — excluded when it equals dag_id (the default) - if (dagDisplayName != null && dagDisplayName != id) { - result["dag_display_name"] = dagDisplayName - } - - // Collection fields — serialized then unwrapped; excluded when empty - if (tags.isNotEmpty()) result["tags"] = unwrapTypeEncoding(serializeValue(tags)) - if (ownerLinks.isNotEmpty()) result["owner_links"] = unwrapTypeEncoding(serializeValue(ownerLinks)) - - return result -} +): Serialized = + mutableMapOf( + // Required fields (always present) + "dag_id" to id, + "fileloc" to fileloc, + "relative_fileloc" to relativeFileloc, + // Always serialized + "timezone" to "UTC", + "timetable" to serializeTimetable(), + "tasks" to tasks.entries.map { (taskId, task) -> task.serialize(taskId, dependants[taskId]) }, + "dag_dependencies" to emptyList(), + "task_group" to serializeTaskGroup(tasks.keys), + "edge_info" to emptyMap(), + "params" to serializeParams(emptyMap()), + "deadline" to null, + "allowed_run_types" to null, + ) /** Serialize a single DAG to a dict. Exposed for cross-language validation testing. */ -internal fun serializeDag(dag: Dag): Serialized = dag.serialize(dag.dagId, "", "") +internal fun serializeDag(dag: Dag): Serialized = dag.serialize(dag.id, "", "") // --------------------------------------------------------------------------- // Top-level envelope — matches Python's DagSerialization.to_dict diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleTest.kt index 9671972a4fa55..48bae797cbe20 100644 --- a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleTest.kt +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BundleTest.kt @@ -42,6 +42,6 @@ internal class BundleTest { Bundle("0", listOf(Dag("dag"), Dag("dag"))) } - Assertions.assertEquals("Duplicate dagId in bundle: dag", error.message) + Assertions.assertEquals("Dags in bundle have duplicate ID: dag", error.message) } } diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SerializationCompatibilityTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SerializationCompatibilityTest.kt index 41edbe3f2aa35..74307445ca6ee 100644 --- a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SerializationCompatibilityTest.kt +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/SerializationCompatibilityTest.kt @@ -28,15 +28,13 @@ import org.junit.jupiter.api.Assertions.assertNotNull import org.junit.jupiter.api.DynamicTest import org.junit.jupiter.api.TestFactory import java.io.File -import java.time.Duration -import java.time.Instant /** - * Reads test_dags.yaml, constructs Dags from the parameters, serialises each + * Reads test_dags.yaml, constructs Dags from the parameters, serializes each * one with the Java SDK, and writes the result to serialized_java.json for * cross-language comparison with the Python output. * - * Each YAML test-case is turned into a JUnit 5 dynamic test so failures are + * Each YAML test-case is turned into a JUnit 5 dynamic test, so failures are * reported individually. * * After running: @@ -72,43 +70,7 @@ class SerializationCompatibilityTest { // ----------------------------------------------------------------------- @Suppress("UNCHECKED_CAST") - private fun constructDag(params: Map): Dag { - val dagId = params["dag_id"] as String - - return Dag( - dagId = dagId, - description = params["description"] as? String, - schedule = params["schedule"] as? String, - startDate = (params["start_date"] as? String)?.let { Instant.parse(it) }, - endDate = (params["end_date"] as? String)?.let { Instant.parse(it) }, - defaultArgs = (params["default_args"] as? Map) ?: emptyMap(), - maxActiveTasks = (params["max_active_tasks"] as? Number)?.toInt() ?: Dag.DEFAULT_MAX_ACTIVE_TASKS, - maxActiveRuns = (params["max_active_runs"] as? Number)?.toInt() ?: Dag.DEFAULT_MAX_ACTIVE_RUNS, - maxConsecutiveFailedDagRuns = - (params["max_consecutive_failed_dag_runs"] as? Number)?.toInt() - ?: Dag.DEFAULT_MAX_CONSECUTIVE_FAILED_DAG_RUNS, - dagrunTimeout = (params["dagrun_timeout_seconds"] as? Number)?.let { Duration.ofSeconds(it.toLong()) }, - catchup = params["catchup"] as? Boolean ?: false, - docMd = params["doc_md"] as? String, - accessControl = - (params["access_control"] as? Map>)?.mapValues { (_, resources) -> - resources.mapValues { (_, perms) -> - when (perms) { - is List<*> -> perms.filterIsInstance().toSet() - is Set<*> -> perms.filterIsInstance().toSet() - else -> setOf(perms.toString()) - } - } - }, - isPausedUponCreation = params["is_paused_upon_creation"] as? Boolean, - tags = (params["tags"] as? List<*>)?.filterIsInstance()?.toSet() ?: emptySet(), - ownerLinks = (params["owner_links"] as? Map) ?: emptyMap(), - failFast = params["fail_fast"] as? Boolean ?: false, - dagDisplayName = params["dag_display_name"] as? String, - renderTemplateAsNativeObj = params["render_template_as_native_obj"] as? Boolean ?: false, - params = params["params"] as? Map, - ) - } + private fun constructDag(params: Map): Dag = Dag(params["dag_id"] as String) // ----------------------------------------------------------------------- // Dynamic test generation From 3cd2322bef368f295d0887c813311658e0b85b74 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Wed, 6 May 2026 11:08:53 +0800 Subject: [PATCH 11/16] Add ktlint pre-commit hook (#1575) --- .pre-commit-config.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 67a0813959178..756370e075dbc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1100,3 +1100,10 @@ repos: language: python files: .*test.*\.py$ pass_filenames: true + - id: ktlint + name: Run ktlint format + description: "Use ktlint (via Gradle) to format Kotlin and Java files" + entry: ./java-sdk/gradlew -p ./java-sdk ktlintFormat + language: system + pass_filenames: false + files: ^java-sdk/.*$ From 45e1d8103c5d7a4f802add04f954100522eaf430 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Wed, 6 May 2026 13:07:48 +0800 Subject: [PATCH 12/16] Fix license in example --- .../airflow/example/ExampleBundleBuilder.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/java-sdk/example/src/java/org/apache/airflow/example/ExampleBundleBuilder.java b/java-sdk/example/src/java/org/apache/airflow/example/ExampleBundleBuilder.java index 745d6c1696359..1368d891e97ea 100644 --- a/java-sdk/example/src/java/org/apache/airflow/example/ExampleBundleBuilder.java +++ b/java-sdk/example/src/java/org/apache/airflow/example/ExampleBundleBuilder.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.airflow.example; import org.apache.airflow.sdk.*; From 584e0b1fdce951834e008969ce2533dd4d4249cc Mon Sep 17 00:00:00 2001 From: LIU ZHE YOU Date: Wed, 6 May 2026 14:26:49 +0800 Subject: [PATCH 13/16] Enhance ADR documentation for Java SDK to reflect latest discussion - cut-date: 2026, May 6th - add sections on public API surface, IPC forward-compatibility, and deployment updates - introduce new ADR for coordinator packaging and registration. --- .../adr/0001-java-sdk-airflow-integration.md | 41 ++++++++++ java-sdk/adr/0002-dag-parsing.md | 35 ++++++++ java-sdk/adr/0003-workload-execution.md | 44 ++++++++++ java-sdk/adr/0004-pure-java-dags.md | 16 ++++ java-sdk/adr/0005-coordinator-packaging.md | 80 +++++++++++++++++++ 5 files changed, 216 insertions(+) create mode 100644 java-sdk/adr/0005-coordinator-packaging.md diff --git a/java-sdk/adr/0001-java-sdk-airflow-integration.md b/java-sdk/adr/0001-java-sdk-airflow-integration.md index a3f8703df1e27..9dbb3bcb7e84e 100644 --- a/java-sdk/adr/0001-java-sdk-airflow-integration.md +++ b/java-sdk/adr/0001-java-sdk-airflow-integration.md @@ -103,6 +103,33 @@ Both approaches are supported in parallel. A pure Java DAG needs no Python at al > **Note:** The current `BundleBuilder` interface used in pure Java DAGs is subject to review before the SDK reaches 1.0. Subclassing `Dag` directly may be a more natural fit and is being considered for post-OSS-integration. +### Public API Surface: `Client` and `Context` + +The Java task interface is `void execute(Client client)`. Two design choices warrant explanation. + +**Why `Client`, not `Context`?** The Java SDK exposes two objects, mirroring the Go SDK: + +| Object | Holds | Lifecycle | +|---|---|---| +| `Context` | Static run-time data (`ds`, `ti`, logical date, run-id, etc.) | Populated once from `StartupDetails`, read-only during execution | +| `Client` | Active accessors that perform Execution API calls (connections, variables, XCom) | Each method call is a synchronous request/response over the comm channel | + +In Python, magic objects on the context (e.g., `outlet_events`) can perform Execution API calls transparently because of the language's flexibility. Java is more rigid; making `Context` itself perform background API calls would require significantly more wiring without much user-visible benefit. Splitting the two surfaces makes the API call boundary explicit at the type level. + +**Why is `execute` `void`?** Returning a value from `execute` would imply an automatic XCom push. Java's static type system does not have a clean equivalent of Python's "return any object, get a default-keyed XCom" pattern, and explicit `client.setXCom(...)` calls keep the wire-level behavior obvious. This is a deliberate departure from Python's `@task` semantics, not an oversight. + +### Coordinator Interface: Subprocess-Based by Design + +`BaseCoordinator` exposes both **low-level** hooks (`dag_parsing_cmd`, `task_execution_cmd`) and **high-level** lifecycle methods (`run_dag_parsing`, `run_task_execution`). Subclasses normally implement only the `*_cmd` callbacks; the base class owns the TCP servers, the subprocess spawn, and the I/O bridge. + +This is deliberately tight coupling to a subprocess model. The reasoning: + +- **DAG files written in a programming language have side effects.** Airflow already isolates Python parsing and task execution into child processes; the coordinator interface preserves that invariant for any non-Python language. +- **`*_cmd` is the smallest possible contract for a new language.** A new SDK only needs to translate "you're being asked to parse this file / run this task" into an OS-level launch command. Everything else (TCP plumbing, framing, byte forwarding) is shared. +- **High-level overrides are still available.** A coordinator that wants to bypass the subprocess model entirely (in-process JVM via JNI, REST call to a remote DAG repository, etc.) can override `run_dag_parsing` / `run_task_execution` directly and ignore the `*_cmd` hooks. The two-tier interface is intentional. + +A complementary, **out-of-scope** future direction is parsing static (non-programming-language) DAG sources such as YAML (e.g., `dag-factory`). Those do not need a child process at all — but the decision to launch a child is currently made one layer above the coordinator (`DagFileProcessorManager` → `DagFileProcessorProcess`). Hooking in a YAML parser would need a separate extension point at the manager layer; it is not blocked by this design but is also not solved by it. A follow-up AIP is expected to formalize a general "any-source DAG parser" plugin model. + ### The Coordinator Layer We introduce a **Coordinator** layer. When a DAG bundle is loaded, it not only tells Airflow how to find the DAGs (and tasks in them), but also how to *run* each task. Current Python tasks use a Python code path that runs them by forking. A new **Java Coordinator** instructs the task runner how to run tasks in JAR files. @@ -251,6 +278,20 @@ process-coordinators: - airflow.providers.sdk.java.coordinator.JavaCoordinator ``` +> **Open question:** the package name, module path, and registration mechanism for coordinator providers (`apache-airflow-providers-sdk-java` vs `apache-airflow-coordinator-java`, `ProvidersManager` vs a dedicated `CoordinatorManager`) is being tracked separately in [ADR-0005](0005-coordinator-packaging.md). + +### Implementation Language: Kotlin (with a Java-First Public API) + +The user-facing API surface (`Task`, `Client`, `Context`, `Dag`, `DagBundle`) is published as Java types and is the contract bundle authors program against. The SDK *implementation* — `CoordinatorComm`, `Serde`, `TaskSdkFrames`, `Server`, `Supervisor`, `TaskRunner`, `DagParser` — is written in Kotlin. + +Kotlin compiles to the same JVM bytecode as Java and is fully interoperable, so this choice is invisible to bundle authors at runtime. The practical reasons for using Kotlin internally: + +- **Null safety** is part of the type system, removing a large class of latent NPEs in the comm/serde paths. +- **Coroutines and structured I/O** simplify the synchronous-over-async pattern used by `Client.getVariable()` and friends. +- **Less boilerplate** in serialization and frame encoding code, which is the bulk of the SDK. + +Because the user-facing API is Java, "Java SDK" remains the accurate name from a DAG-author perspective. A future rename to "JVM SDK" has been floated but is not adopted here; it can be revisited if/when Scala or other JVM-language bindings are proposed. + ### Example: `JavaCoordinator` ```python diff --git a/java-sdk/adr/0002-dag-parsing.md b/java-sdk/adr/0002-dag-parsing.md index f5ea2c6651b5e..b77070add87c1 100644 --- a/java-sdk/adr/0002-dag-parsing.md +++ b/java-sdk/adr/0002-dag-parsing.md @@ -51,6 +51,8 @@ process-coordinators: A single registration covers both DAG parsing and task execution — there are no separate `dag-file-processors` or `task-coordinators` keys. +**Per-host opt-in.** A coordinator becomes available on a given DAG processor host only when its provider is installed there. A deployment can run a Python-only DAG processor pool and a separate Java-capable DAG processor pool by simply *not* installing `apache-airflow-providers-sdk-java` on the Python-only hosts. The same applies to workers ([ADR-0003](0003-workload-execution.md)). There is no requirement that every parser carry a JDK; the coordinator extension point is opt-in by package install. + ### Discovery: `_resolve_processor_target()` When `DagFileProcessorProcess.start()` needs to parse a file: @@ -66,6 +68,39 @@ _resolve_processor_target(path, bundle_name, bundle_path) The first coordinator whose `can_handle_dag_file()` returns `True` wins. If none match, the default Python `_parse_file_entrypoint` runs. +### Transport: Why msgpack over TCP Loopback + +A natural reviewer question is "why a custom-looking framed-msgpack protocol over `127.0.0.1:`, and not Unix sockets / gRPC / HTTP REST?" Two clarifications are important: + +1. **The protocol is not new for the Java SDK.** Length-prefixed msgpack frames are the existing transport between the Airflow supervisor and the Python task runner (see `task-sdk/src/airflow/sdk/execution_time/supervisor.py` and `comms.py`). The coordinator bridge wires the language-runtime sockets onto that same byte stream — it does not define a new wire format. Migrating it would be a separate, pan-SDK change. +2. **Forward-compat for IPC messages is treated as a contract**, not as a transport choice. The decoder rules that all SDKs must follow are stated in [ADR-0003 — IPC Forward-Compatibility Contract](0003-workload-execution.md#ipc-forward-compatibility-contract). + +#### Alternatives considered + +| Option | Why not (today) | +|---|---| +| **Unix domain sockets** instead of TCP loopback | Avoids the IPv6/dual-stack concern with `127.0.0.1`, and matches conventions like Docker's `/var/run/docker.sock`. Worth revisiting once a formal IPC AIP lands; not adopted now because it would diverge from the existing Python supervisor transport, which is also TCP loopback. | +| **gRPC / Protocol Buffers** | Would require defining an intermediate IDL for `DagFileParseRequest`, `StartupDetails`, etc. The internal serialization that the language runtime returns (DagSerialization v3) is *not* expressible as a flat ProtoBuf without losing information — see "Cross-SDK serialization compatibility" below. gRPC would replace one custom-looking layer with two: ProtoBuf for transport plus a separate JSON-shaped DAG payload nested inside it. | +| **HTTP REST** | Adds an HTTP server in every language runtime and an HTTP client in the supervisor for a strictly local, single-peer connection. None of HTTP's value (intermediaries, caching, content negotiation) applies. The Java SDK's `Supervisor.kt` already does HTTP for the *Execution API* (Edge-worker path); the comm channel between supervisor and language runtime is intentionally lower-level. | +| **Keep msgpack-over-TCP** (chosen) | Reuses the existing supervisor transport unchanged; the bridge is a pure byte forwarder. New language SDKs only need a length-prefixed-msgpack codec, which exists in every target language. | + +A formal AIP for the supervisor-to-runtime comm protocol is expected as a follow-up once two or more language SDKs (Java, Go) are in tree; that AIP is the natural place to revisit transport and framing. + +### Cross-SDK Serialization Compatibility + +The `DagFileParsingResult` payload that a language runtime returns is the *Airflow internal* serialized DAG format, not an SDK-defined schema. The authoritative reference is `airflow-core/src/airflow/serialization/schema.json`, which describes `LazyDeserializedDAG` (see `airflow-core/src/airflow/dag_processing/processor.py` and `airflow-core/src/airflow/serialization/serialized_objects.py`). The scheduler reads this format directly into its internal model — any divergence is a parsing failure. + +**Why a per-language reimplementation rather than codegen?** The first attempt was to generate POJOs from `schema.json` (similar to how Pydantic models are generated from OpenAPI specs). That approach was abandoned because the generated types miss the wrapping/unwrapping rules that distinguish "decorated" fields (kept as `{"__type", "__var"}`) from "non-decorated" fields (unwrapped to the bare value), as well as the timetable/task encoding rules listed below. Wiring an extra translation layer on top of generated types added more code than implementing the serializer directly per language. + +**Compatibility strategy.** Each language SDK ships its own serializer plus a cross-SDK validator: + +- A shared `test_dags.yaml` defines logical fixtures. +- Python emits `serialized_python.json` via `DagSerialization.serialize_dag()`. +- Each language SDK emits `serialized_.json` via its own serializer. +- `compare.py` does a field-by-field comparison and fails on divergence. + +This validator is planned to run as a CI gate (PR #65959). A complementary direction (suggested by reviewers, deferred): publish JSON schemas for the IPC envelope types themselves (`DagFileParsingResult`, `StartupDetails`, `TaskInstance`), which are currently undocumented because they were Python-to-Python only. That work is out of scope for the Java SDK PR but is a sensible next step once a second language SDK is in tree. + ### What the Base Class Handles Automatically The matched coordinator's `run_dag_parsing()` (a concrete method on `BaseCoordinator`) delegates to `_runtime_subprocess_entrypoint()`, which handles all the TCP/process plumbing: diff --git a/java-sdk/adr/0003-workload-execution.md b/java-sdk/adr/0003-workload-execution.md index 5f518705622fd..027fb7a0ed6a4 100644 --- a/java-sdk/adr/0003-workload-execution.md +++ b/java-sdk/adr/0003-workload-execution.md @@ -200,6 +200,50 @@ The task execution follows a synchronous request/response pattern from the runti 4. This repeats for each Airflow service call the task code makes 5. When the task finishes, the runtime sends a terminal message (`SucceedTask` or `TaskState`) — no response is expected, and the process exits +### IPC Forward-Compatibility Contract + +The supervisor-to-runtime IPC schema (the messages enumerated above plus `StartupDetails` and `DagFileParseRequest` from [ADR-0002](0002-dag-parsing.md)) is shared between Airflow Core (Python) and every language SDK. A formal AIP for this protocol is expected as follow-up work; until then, this section pins down the rules that the Java SDK assumes and that any future SDK (Go, Rust, …) must follow. + +**Codec rule (load-bearing).** Every SDK MUST configure its decoder to ignore unknown fields: + +- Python side: `msgspec` / Pydantic models are forward-compatible by default. +- Java side: `TaskSdkFrames.kt` configures the Jackson `ObjectMapper` with `FAIL_ON_UNKNOWN_PROPERTIES = false`. A short comment at that call site documents that this is contract, not preference — flipping it back to the Jackson default would break forward compatibility with Core. +- Any new SDK: pick a codec configuration that mirrors this (silent drop of unknown fields). + +This rule is what makes additive Core changes safe to ship without bumping a version on every SDK. The analogous trap — generated clients that emit their *own* allowlist check before the configured mapper sees the bytes — has bitten downstream Java consumers in unrelated systems; flagging the contract here makes it visible to future SDK authors. + +**Change classification.** + +| Change to a message | Status | Required action | +|---|---|---| +| Add a new optional field | **Non-breaking.** Decoders ignore it; old SDKs unaffected. | None. Just ship it. | +| Add a new required field | Breaking. | Deprecation cycle: ship as optional first, populate from Core, wait for SDKs to consume it, then tighten. | +| Rename a field | Breaking. | Deprecation cycle: emit both names from Core during transition. | +| Change a field's type | Breaking. | Deprecation cycle, typically via a new field name + parallel emission. | +| Remove a required field | Breaking. **Especially dangerous in Java**: `lateinit var` properties on `StartupDetails` deserialize silently and only throw `UninitializedPropertyAccessException` on first access, so the failure surfaces inside user task code rather than at the protocol boundary. | Deprecation cycle. Prefer making the field optional first, then remove after a release in which all SDKs have absorbed the change. | + +**Recommended testing.** A small contract test on the SDK side should feed the decoder synthetic frames that exercise the rules above — an unknown field, a missing optional field, a `null` in an optional position — so that a future codec-config regression is caught before it reaches users. `SerializationCompatibilityTest` already covers DAG-payload divergence (see [ADR-0002 — Cross-SDK Serialization Compatibility](0002-dag-parsing.md#cross-sdk-serialization-compatibility)); the IPC-envelope tests are complementary and currently in the follow-up bucket. + +### Runtime Lifecycle and Worker Capability + +The language runtime is **ephemeral and one-process-per-task**: + +- Each task instance launches its own `java -classpath /* --comm=… --logs=…` (or the equivalent for another language). The lifetime of that process is the lifetime of the task. There is no pooling or warm-pool reuse. +- Parallelism on a single worker therefore equals the number of concurrently running task processes. Five concurrent Java tasks on one worker means five JVMs. +- DAG parsing has the same shape: each `DagFileProcessorProcess` child handles one parse request and exits. The language runtime spawned underneath it inherits that ephemerality. + +**Worker capability is opt-in.** A worker can run a non-Python task only if the corresponding `apache-airflow-providers-sdk-` provider is installed and the language toolchain (e.g., a JRE) is on the host. There is no requirement that every worker support every language. Routing relies on: + +| Layer | Mechanism | +|---|---| +| Author intent | Operator / `@task.stub` declares `queue="java"` (or any custom queue) | +| Worker selection | The executor (Celery, Kubernetes, etc.) routes the task to a worker that consumes that queue, exactly as it does for Python tasks today | +| Runtime selection | Inside the task runner, `[sdk] queue_to_sdk` maps the queue name to the coordinator's `sdk` value (`"java"`); `_resolve_runtime_entrypoint` then dispatches into `JavaCoordinator.run_task_execution` | + +The deployment model is the same one that already applies to Python providers: install what your DAGs need, on the hosts they run on. Multi-language workers are possible (install both providers and both toolchains) but not required. + +**JAR / artifact version compatibility.** The Java SDK embeds its version in the bundle JAR via the `Airflow-Java-SDK-Version` manifest attribute (see [ADR-0004](0004-pure-java-dags.md)). Validating that a bundle's SDK version matches the installed `JavaCoordinator` version at execution time is planned but not yet wired in; this is a follow-up to add before promoting the SDK out of preview. + ### StartupDetails The first message the runtime receives is `StartupDetails`, which provides full context for the task: diff --git a/java-sdk/adr/0004-pure-java-dags.md b/java-sdk/adr/0004-pure-java-dags.md index 9ca153be2d7c4..c55bf5505630b 100644 --- a/java-sdk/adr/0004-pure-java-dags.md +++ b/java-sdk/adr/0004-pure-java-dags.md @@ -223,6 +223,22 @@ The `main()` method is the JVM entry point that the coordinator launches. It wir > **Note:** The current `BundleBuilder` interface is subject to review before the SDK reaches 1.0. Subclassing `Dag` directly may be a more natural fit and is being considered for post-OSS-integration. +### Deployment and Updates + +A reasonable concern about JAR-based DAGs is whether updating a bundle requires draining or restarting the DAG processor / workers — Python source files are flexible because everything is read fresh on each parse, but a long-lived JVM holding a JAR open could pin an old version. + +The design avoids this by leaning on the same ephemerality that Python uses: + +- **DAG processor.** `DagFileProcessorManager` is long-lived, but each `DagFileProcessorProcess` child is one-shot and exits after returning a `DagFileParseRequest`. The Java runtime spawned underneath it (`java -classpath /* …`) shares that lifetime — it loads the JAR fresh on every parse, then exits. Replacing the JAR on disk takes effect on the next scheduled parse with no manager restart. +- **Workers.** Each task instance launches its own JVM ([ADR-0003 — Runtime Lifecycle and Worker Capability](0003-workload-execution.md#runtime-lifecycle-and-worker-capability)). The classloader is process-scoped; a swapped JAR is picked up the next time a task starts. There is no warm JVM pool to invalidate. + +In practice, "updating a Java DAG bundle" is the same shape as "updating a Python DAG file": drop the new file (or directory of JARs) into the bundle location and let normal scheduling pick it up. The version that runs a given task instance is determined at task start, not at worker start. + +Two operational details worth flagging: + +- **Atomic swap.** Writing a JAR in place while a task happens to be loading it can yield a corrupted read. Operators should prefer the standard "write to a temp name, rename into place" pattern, which the file system handles atomically on POSIX. This is the same guidance that already applies to Python file-system bundles. +- **Mid-run version skew.** Because the version is resolved per task launch, a long-running DAG run can in principle observe one bundle version for an upstream task and a different version for a downstream task if a swap happens between them. Bundle-version validation against `Airflow-Java-SDK-Bundle-Version` (planned — distinct from `Airflow-Java-SDK-Version`, which identifies the SDK toolkit; see [ADR-0003](0003-workload-execution.md#runtime-lifecycle-and-worker-capability)) gives operators a way to detect skew if it matters; the data-plane consequences (XCom shape changes, etc.) are the bundle author's responsibility, exactly as with Python. + ## Consequences - JAR bundles are self-contained: metadata, source, and compiled code are all in one artifact, simplifying deployment (copy one directory of JARs). diff --git a/java-sdk/adr/0005-coordinator-packaging.md b/java-sdk/adr/0005-coordinator-packaging.md new file mode 100644 index 0000000000000..18f8080784046 --- /dev/null +++ b/java-sdk/adr/0005-coordinator-packaging.md @@ -0,0 +1,80 @@ + + +# ADR-0005: Coordinator Packaging, Module Layout, and Registration + +## Status + +Proposed — open for discussion. The packaging shipped with the initial Java SDK PRs (`apache-airflow-providers-sdk-java` under `providers/sdk/java/`, registered through `ProvidersManager`) is the *starting point*; this ADR enumerates the alternatives raised on PR #65958 so they can be decided before a second language SDK lands. + +## Context + +[ADR-0001](0001-java-sdk-airflow-integration.md) introduces a coordinator extension point and ships the Java implementation as an Airflow provider. Reviewers on PR #65958 raised three related but separable questions: + +1. **PyPI package name.** Should the Java coordinator ship as `apache-airflow-providers-sdk-java` (consistent with every other provider) or as `apache-airflow-coordinator-java` (recognizing that "language coordinator" is a structurally new kind of distribution that does not behave like operators/hooks/sensors)? +2. **Source-tree module layout.** Should it live under `providers/sdk/java/` alongside other providers, under a nested `providers/coordinators/java/`, or at a new top-level `coordinators/` directory peer to `providers/`, `airflow-core/`, and `task-sdk/`? +3. **Discovery / registration mechanism.** Should coordinator classes be discovered through the existing `ProvidersManager` (and its task-runtime equivalent `ProvidersManagerTaskRuntime`), or through a dedicated `CoordinatorManager` (likely living in a `_shared` library because both Airflow Core and Task SDK need to consume it)? + +A related concern raised separately on the same PR is **discoverability and user confusion**: providers appear in the Airflow registry / docs, so "`apache-airflow-providers-sdk-java` exists but `apache-airflow-providers-sdk-go` does not" is visible to end users today (the Go SDK currently ships through Edge-Worker, not as a coordinator). The naming choice affects how prominent that asymmetry is, but it is a **transitional** problem: once Go-SDK migrates to the coordinator interface (planned for 3.3), the asymmetry disappears regardless of which name is chosen. + +## Decision (provisional) + +**Adopted for the initial PRs:** option **A1** for naming, **B1** for layout, **C1** for registration — i.e., `apache-airflow-providers-sdk-java` under `providers/sdk/java/`, registered via `ProvidersManager`. This is the path of least resistance for landing the SDK and unblocks downstream PRs. + +**Open to revisit before a second language SDK lands** (Go-SDK migration): the options below. + +### A. PyPI package name + +| Option | Name | Argument for | Argument against | +|---|---|---|---| +| **A1** *(current)* | `apache-airflow-providers-sdk-java` | Consistent with the existing provider taxonomy; no new release machinery; user-installation muscle memory (`pip install apache-airflow-providers-…`) carries over. | A coordinator does not expose operators / hooks / sensors / triggers; calling it a "provider" stretches the term. | +| **A2** | `apache-airflow-coordinator-java` | Names the component for what it is — a runtime/coordinator plugin, structurally distinct from a normal provider. Marks it as a new distribution type early, before precedent calcifies (`fab` is the cautionary example reviewers cited). | New distribution type means new release docs, new constraints handling, possibly new versioning conventions vs Airflow Core. Unfamiliar `pip install` shape for users. | +| **A3** | `apache-airflow-sdk-java` | Cleanest from a user perspective — "I'm authoring DAGs in language X, I install the language-X SDK." | Conflicts with the in-tree Python `task-sdk` naming; ambiguous whether the package contains the *user-facing* SDK or the *coordinator* glue. | + +### B. Source-tree layout + +| Option | Path | Argument for | Argument against | +|---|---|---|---| +| **B1** *(current)* | `providers/sdk/java/` | Already in the providers monorepo conventions; no new top-level directory; ProvidersManager already scans `providers/`. | Visually lumps coordinators together with op/hook/sensor providers. | +| **B2** | `providers/coordinators/java/` | Keeps coordinators inside `providers/` (so existing tooling still finds them) but groups them as a sub-category, signaling that they are not normal providers. | Slight tooling change (provider discovery would need to recurse into the coordinator subtree). Still inherits "this is a provider" framing. | +| **B3** | `coordinators/` (new top-level peer to `airflow-core/`, `task-sdk/`, `providers/`) | Strongest separation; matches the A2 naming. Easier to apply different release / docs rules. | New top-level directory, new uv workspace member, new CI matrix entry. Bigger change for a still-debated decision. | + +### C. Discovery / registration + +| Option | Mechanism | Argument for | Argument against | +|---|---|---|---| +| **C1** *(current)* | Existing `ProvidersManager` (`airflow-core`) and `ProvidersManagerTaskRuntime` (`task-sdk`) discover the `coordinators` key in `provider.yaml`. | Reuses the discovery pipeline already present in both Core and Task SDK. Zero new infrastructure. | Conceptually couples coordinators to providers even if A2/B3 are picked. | +| **C2** | New `CoordinatorManager`, likely in a shared library (`shared/coordinator-manager/`) so both Core and Task SDK can consume it without import cycles. Coordinators self-register through this manager (e.g., entry-points group `airflow.coordinators` rather than `provider.yaml`). | Decouples coordinator discovery from provider discovery; cleaner separation aligned with A2/B3. Allows coordinator-specific lifecycle hooks (init, version-handshake, capability advertisement) without adding them to `ProvidersManager`. | New shared distribution + its symlink wiring; migration of the Java SDK off `provider.yaml` registration; doubled discovery code paths during transition. | + +## Recommendation + +The three axes are correlated but not strictly tied. A reasonable consistent set is: + +- **Conservative, ship-now:** A1 + B1 + C1 (current state). Lowest risk, lowest change footprint, accepts that "provider" is a slight term-of-art stretch. +- **Aligned-rename:** A2 + B2 + C1. Renames the distribution to `apache-airflow-coordinator-java`, keeps the source under `providers/coordinators//`, reuses the discovery infrastructure. This is the cheapest option that clearly *signals* the new component type without re-platforming discovery. +- **Full split:** A2 + B3 + C2. Cleanest end state for when there are multiple language coordinators plus possibly future static-source parsers (cf. the YAML / dag-factory discussion in [ADR-0001](0001-java-sdk-airflow-integration.md#coordinator-interface-subprocess-based-by-design)), but the highest churn and the option that would benefit most from being decided on the dev list as part of a follow-up AIP rather than during the Java-SDK PR review. + +The current choice (A1+B1+C1) is intended to be reversible: renaming a not-yet-released distribution and moving its source tree are both mechanical changes. The decision to defer is itself a choice — reviewers who want a different end state should call it out before the SDK ships in `3.3`, not after. + +## Consequences + +- The Java SDK ships under provider naming/layout in 3.3-preview; if the project later picks A2/B2/B3/C2, the rename becomes a single-PR refactor with deprecation shims (or a hard rename if the SDK is still in preview and we can break unreleased import paths). +- The Go-SDK migration to the coordinator interface (planned for 3.3) is the natural forcing function for a final decision: a second language SDK lands, and the cost of disagreement compounds. +- `ProvidersManager` accumulates an extra extension point (`coordinators`) that is not really an Airflow "provider" responsibility. This is acceptable as a transitional state but is the strongest argument for option C2 over time. +- Documentation must be explicit that "Java appears in the provider list, Go does not" is a transitional quirk, not a stable property. Whichever naming option is picked, release notes for 3.3 should call out the answer for both languages together. From 26247c720c784e11130a562facd2dfa0983d3ce6 Mon Sep 17 00:00:00 2001 From: LIU ZHE YOU Date: Wed, 6 May 2026 16:55:23 +0800 Subject: [PATCH 14/16] Move sdk.java out of provider as coordinator-java distribution - Updated the Java SDK Airflow integration documentation (ADR-0001) to clarify the role of the Coordinator layer, including the new structure for coordinator instances and their registration in Airflow configuration. - Revised the DAG parsing documentation (ADR-0002) to reflect the shift from provider-based registration to instance-based configuration for coordinators. - Enhanced the workload execution documentation (ADR-0003) to detail the task execution process and the opt-in nature of worker capabilities for different language runtimes. - Established a new packaging and registration model for coordinators (ADR-0005), distinguishing them from traditional Airflow providers and introducing a namespace package for language-specific coordinators. - Updated configuration examples to demonstrate the new `[sdk] coordinators` and `queue_to_coordinator` settings, allowing for multiple instances of the same coordinator with different runtime configurations. --- .../adr/0001-java-sdk-airflow-integration.md | 139 ++++++++++++------ java-sdk/adr/0002-dag-parsing.md | 59 +++++--- java-sdk/adr/0003-workload-execution.md | 72 +++++---- java-sdk/adr/0005-coordinator-packaging.md | 117 ++++++++++----- 4 files changed, 246 insertions(+), 141 deletions(-) diff --git a/java-sdk/adr/0001-java-sdk-airflow-integration.md b/java-sdk/adr/0001-java-sdk-airflow-integration.md index 9dbb3bcb7e84e..f5735b0d7b55a 100644 --- a/java-sdk/adr/0001-java-sdk-airflow-integration.md +++ b/java-sdk/adr/0001-java-sdk-airflow-integration.md @@ -134,7 +134,9 @@ A complementary, **out-of-scope** future direction is parsing static (non-progra We introduce a **Coordinator** layer. When a DAG bundle is loaded, it not only tells Airflow how to find the DAGs (and tasks in them), but also how to *run* each task. Current Python tasks use a Python code path that runs them by forking. A new **Java Coordinator** instructs the task runner how to run tasks in JAR files. -The base interface (`BaseCoordinator`) lives in `airflow.sdk.execution_time` and is selected automatically via `ProvidersManagerTaskRuntime`. The Java Coordinator lives in a provider under the `airflow.providers.sdk.java` namespace, and new language coordinators follow the same pattern. +The base interface (`BaseCoordinator`) lives in `airflow.sdk.execution_time`. Concrete coordinators ship as standalone distributions — **not** as Airflow providers — under the shared `airflow.sdk.coordinators` namespace package. The Java coordinator ships as `apache-airflow-coordinators-java` and resolves to `airflow.sdk.coordinators.java.JavaCoordinator`. New language coordinators follow the same pattern: `apache-airflow-coordinators-` → `airflow.sdk.coordinators..Coordinator`. + +Coordinators are instantiated from the `[sdk] coordinators` Airflow configuration (see [Coordinator Registration](#coordinator-registration) below). Both Airflow Core (DAG processor) and Task SDK (task runner) read that config and use `import_string()` to load the configured `classpath` — no provider plumbing is involved. Decoupling coordinators from the provider system is the direction agreed in [ADR-0005](0005-coordinator-packaging.md) and tracked in [apache/airflow#66451](https://github.com/apache/airflow/issues/66451), which also motivates the per-instance `kwargs` (multiple JDK versions, JVM flags, etc.) that a class-only registration could not express. ### Architecture Overview @@ -190,50 +192,54 @@ The base interface (`BaseCoordinator`) lives in `airflow.sdk.execution_time` and │ Task Runner │ │ │ │ │ │ QueueToCoordinatorMapper │ │ - │ maps queue via `[sdk] │ │ - │ queue_to_sdk` config ───────┼───────────────────────────────────┘ - │ to matching coordinator │ + │ resolves queue via `[sdk] │ │ + │ queue_to_coordinator` ──────┼───────────────────────────────────┘ + │ to a coordinator instance │ + │ from `[sdk] coordinators` │ └──────────────────────────────┘ ``` ### The `BaseCoordinator` Interface -This is the central abstraction that language providers implement. It lives in the Task SDK (`task-sdk/src/airflow/sdk/execution_time/coordinator.py`) and handles both DAG parsing and task execution for a specific language runtime. +This is the central abstraction that language SDKs implement. It lives in the Task SDK (`task-sdk/src/airflow/sdk/execution_time/coordinator.py`) and handles both DAG parsing and task execution for a specific language runtime. ```python class BaseCoordinator: """ Base coordinator for runtime-specific DAG file processing and task execution. - Providers register subclasses in their ``provider.yaml`` under - ``coordinators``. Both ProvidersManager (airflow-core) and - ProvidersManagerTaskRuntime (task-sdk) discover coordinators through - this extension point. + Subclasses represent a specific language runtime (Java, Go, etc.) and are + instantiated by Airflow Core (DAG processor) and Task SDK (task runner) + from the ``[sdk] coordinators`` Airflow configuration. Each entry in that + config carries an instance ``name``, an importable ``classpath``, and + free-form ``kwargs`` that the subclass accepts in ``__init__`` — this is + how operators express runtime variants (multiple JDK versions, custom JVM + flags, etc.) without needing one subclass per variant. - Subclasses represent a specific language runtime (Java, Go, etc.) and - implement three methods. The base class owns the full bridge lifecycle: - TCP servers, subprocess management, selector-based I/O loop, and cleanup. + The base class owns the full bridge lifecycle: TCP servers, subprocess + management, selector-based I/O loop, and cleanup. """ - sdk: str # e.g. "java", "go" — matches sdk field on operator/TI + name: str # Instance name from [sdk] coordinators (e.g. "jdk-11", "jdk-17") + + def __init__(self, *, name: str, **kwargs) -> None: + """Accept the per-instance ``kwargs`` declared in ``[sdk] coordinators``.""" + ... # Discovery (called by DAG File Processor) - @classmethod - def can_handle_dag_file(cls, bundle_name: str, path: str | os.PathLike) -> bool: + def can_handle_dag_file(self, bundle_name: str, path: str | os.PathLike) -> bool: """Return True if this coordinator should parse the file at *path*.""" ... - @classmethod - def get_code_from_file(cls, fileloc: str) -> str: + def get_code_from_file(self, fileloc: str) -> str: """Return the actual DAG code (the content of JavaExampleBuilder.java in this case""" ... # DAG Parsing (called in forked DagFileProcessor child process) - @classmethod def dag_parsing_cmd( - cls, + self, *, dag_file_path: str, # Absolute path to DAG file bundle_name: str, # Name of the DAG bundle @@ -246,9 +252,8 @@ class BaseCoordinator: # Task Execution (called in forked worker child process) - @classmethod def task_execution_cmd( - cls, + self, *, what: TaskInstance, dag_rel_path: str | os.PathLike, # Relative path to DAG file within bundle @@ -261,24 +266,44 @@ class BaseCoordinator: # Lifecycle (owned by base class, not overridden) - @classmethod - def run_dag_parsing(cls, *, path, bundle_name, bundle_path) -> None: ... + def run_dag_parsing(self, *, path, bundle_name, bundle_path) -> None: ... - @classmethod - def run_task_execution(cls, *, what, dag_rel_path, bundle_info, startup_details) -> None: ... + def run_task_execution(self, *, what, dag_rel_path, bundle_info, startup_details) -> None: ... ``` -### Provider Registration +### Coordinator Registration + +Coordinators are registered through Airflow configuration, not through `provider.yaml` or any provider-discovery mechanism. The Java coordinator ships as the standalone distribution `apache-airflow-coordinators-java`, which contributes the `airflow.sdk.coordinators.java` subpackage to the namespace package owned by the Task SDK. As long as the distribution is on `PYTHONPATH`, both Airflow Core and the Task SDK can resolve `airflow.sdk.coordinators.java.JavaCoordinator` via `import_string()`. + +Operators wire concrete instances in `airflow.cfg`: -Language providers register their coordinators in `provider.yaml`: +```ini +[sdk] +coordinators = [ + { + "name": "jdk-11", + "classpath": "airflow.sdk.coordinators.java.JavaCoordinator", + "kwargs": { + "java_executable": "/usr/lib/jvm/java-11-openjdk-amd64/bin/java", + "jvm_args": ["-Xmx512m"], + "jdk_home": "/usr/lib/jvm/java-11-openjdk-amd64" + } + }, + { + "name": "jdk-17", + "classpath": "airflow.sdk.coordinators.java.JavaCoordinator", + "kwargs": { + "java_executable": "/usr/lib/jvm/java-17-openjdk-amd64/bin/java", + "jvm_args": ["-Xmx1024m", "-Xms256m"], + "jdk_home": "/usr/lib/jvm/java-17-openjdk-amd64" + } + } +] -```yaml -# providers/sdk/java/provider.yaml -process-coordinators: - - airflow.providers.sdk.java.coordinator.JavaCoordinator +queue_to_coordinator = {"legacy-java-queue": "jdk-11", "modern-java-queue": "jdk-17"} ``` -> **Open question:** the package name, module path, and registration mechanism for coordinator providers (`apache-airflow-providers-sdk-java` vs `apache-airflow-coordinator-java`, `ProvidersManager` vs a dedicated `CoordinatorManager`) is being tracked separately in [ADR-0005](0005-coordinator-packaging.md). +The same `JavaCoordinator` class can back several instances with different runtime configuration; the routing key is the instance `name`, not the class. This shape is the resolution to the packaging and registration questions originally raised in [ADR-0005](0005-coordinator-packaging.md), motivated by [apache/airflow#66451](https://github.com/apache/airflow/issues/66451) (multi-JDK and JVM-flag support). ### Implementation Language: Kotlin (with a Java-First Public API) @@ -295,19 +320,31 @@ Because the user-facing API is Java, "Java SDK" remains the accurate name from a ### Example: `JavaCoordinator` ```python +# Shipped as ``apache-airflow-coordinators-java``; +# resolves to ``airflow.sdk.coordinators.java.JavaCoordinator``. class JavaCoordinator(BaseCoordinator): - sdk = "java" - - @classmethod - def can_handle_dag_file(cls, bundle_name, path): + def __init__( + self, + *, + name: str, + java_executable: str = "java", + jvm_args: list[str] | None = None, + jdk_home: str | None = None, + ) -> None: + self.name = name + self.java_executable = java_executable + self.jvm_args = list(jvm_args or []) + self.jdk_home = jdk_home + + def can_handle_dag_file(self, bundle_name, path): """True when path is a JAR with a Main-Class manifest entry.""" ... - @classmethod - def dag_parsing_cmd(cls, *, dag_file_path, bundle_name, bundle_path, comm_addr, logs_addr): + def dag_parsing_cmd(self, *, dag_file_path, bundle_name, bundle_path, comm_addr, logs_addr): main_class = find_main_class(Path(dag_file_path)) return [ - "java", + self.java_executable, + *self.jvm_args, "-classpath", f"{bundle_path}/*", main_class, @@ -315,12 +352,12 @@ class JavaCoordinator(BaseCoordinator): f"--logs={logs_addr}", ] - @classmethod - def task_execution_cmd(cls, *, what, dag_rel_path, bundle_info, comm_addr, logs_addr): + def task_execution_cmd(self, *, what, dag_rel_path, bundle_info, comm_addr, logs_addr): jar_path = Path(dag_rel_path) main_class = find_main_class(jar_path) return [ - "java", + self.java_executable, + *self.jvm_args, "-classpath", f"{jar_path.parent}/*", main_class, @@ -350,14 +387,17 @@ We have already added compatibility validation between the Python SDK and Java S **3. Execution API — Task Queues Routed to the Worker** -A new configuration is added to map each task's `queue` to a language runtime: +A new pair of configurations registers coordinator instances and maps each task's `queue` to one of them: ```ini [sdk] -queue_to_sdk = {"java": "java"} +coordinators = [ + {"name": "jdk-17", "classpath": "airflow.sdk.coordinators.java.JavaCoordinator", "kwargs": {"java_executable": "java"}} +] +queue_to_coordinator = {"java": "jdk-17"} ``` -This specifies tasks in the `java` queue should be routed to `JavaCoordinator` since it has `sdk = "java"`. +Tasks scheduled to the `java` queue are routed to the coordinator instance named `jdk-17`. Multiple instances of the same class may coexist (e.g., `jdk-11` and `jdk-17`) and bind to different queues — see [Coordinator Registration](#coordinator-registration). ## Consequences @@ -366,14 +406,15 @@ This specifies tasks in the `java` queue should be routed to `JavaCoordinator` s | Component | New Interface | Change Type | |-----------|--------------|-------------| | `BaseCoordinator` | Abstract base defined in Task SDK | New class | -| `coordinators` | Provider extension point in `provider.yaml` | New extension point | +| `airflow.sdk.coordinators` | Namespace package contributed to by `apache-airflow-coordinators-` distributions | New namespace | | `@task.stub` decorator | `queue: str \| None` parameter | Additive | -| `[sdk] queue_to_sdk` | Airflow configuration | New option | -| `_resolve_runtime_entrypoint` | Route by `queue` → `sdk` match | Behavioral | +| `[sdk] coordinators` | Airflow configuration listing instances (`name`, `classpath`, `kwargs`) | New option | +| `[sdk] queue_to_coordinator` | Airflow configuration mapping queue → instance name | New option | +| `_resolve_runtime_entrypoint` | Route by `queue` → coordinator instance from `[sdk] coordinators` | Behavioral | ### What Becomes Easier -- Adding a new language runtime requires only a `BaseCoordinator` subclass, a language SDK, and a `provider.yaml` entry — no changes to Airflow Core. +- Adding a new language runtime requires only a `BaseCoordinator` subclass shipped as `apache-airflow-coordinators-` and a corresponding entry in `[sdk] coordinators` — no changes to Airflow Core and no provider plumbing. - DAG authors can mix Python and non-Python tasks in the same pipeline. - The existing task-runner two-layer design is preserved, keeping all Airflow extensions in Python. diff --git a/java-sdk/adr/0002-dag-parsing.md b/java-sdk/adr/0002-dag-parsing.md index b77070add87c1..5e69e4bb4a496 100644 --- a/java-sdk/adr/0002-dag-parsing.md +++ b/java-sdk/adr/0002-dag-parsing.md @@ -33,7 +33,7 @@ This ADR details the DAG parsing side of the coordinator architecture described ### Extension Point: `BaseCoordinator` -A single abstract base class — `BaseCoordinator` — handles both DAG parsing and task execution. It is registered in `provider.yaml` under `coordinators`. For DAG parsing, a subclass must implement two methods: +A single abstract base class — `BaseCoordinator` — handles both DAG parsing and task execution. Concrete subclasses ship as standalone distributions (`apache-airflow-coordinators-`) under the shared `airflow.sdk.coordinators` namespace package; they are **not** Airflow providers and are not registered through `provider.yaml`. For DAG parsing, a subclass must implement two methods: | Method | Signature | Responsibility | |---|---|---| @@ -42,16 +42,22 @@ A single abstract base class — `BaseCoordinator` — handles both DAG parsing ### Registration -In the provider's `provider.yaml`: +Coordinators are configured in `airflow.cfg` (see [ADR-0001 — Coordinator Registration](0001-java-sdk-airflow-integration.md#coordinator-registration)). Each entry names a coordinator instance, points at an importable class via `classpath`, and supplies per-instance `kwargs`: -```yaml -process-coordinators: - - airflow.providers.sdk..coordinator. +```ini +[sdk] +coordinators = [ + { + "name": "jdk-17", + "classpath": "airflow.sdk.coordinators.java.JavaCoordinator", + "kwargs": {"java_executable": "/usr/lib/jvm/java-17/bin/java"} + } +] ``` -A single registration covers both DAG parsing and task execution — there are no separate `dag-file-processors` or `task-coordinators` keys. +A single instance entry covers both DAG parsing and task execution — there are no separate registries for the two roles. -**Per-host opt-in.** A coordinator becomes available on a given DAG processor host only when its provider is installed there. A deployment can run a Python-only DAG processor pool and a separate Java-capable DAG processor pool by simply *not* installing `apache-airflow-providers-sdk-java` on the Python-only hosts. The same applies to workers ([ADR-0003](0003-workload-execution.md)). There is no requirement that every parser carry a JDK; the coordinator extension point is opt-in by package install. +**Per-host opt-in.** A coordinator becomes available on a given DAG processor host only when its distribution is installed there *and* its instance appears in the host's `[sdk] coordinators`. A deployment can run a Python-only DAG processor pool and a separate Java-capable DAG processor pool by simply *not* installing `apache-airflow-coordinators-java` (or omitting the instance from config) on the Python-only hosts. The same applies to workers ([ADR-0003](0003-workload-execution.md)). There is no requirement that every parser carry a JDK; coordinators are opt-in per host by package install plus config entry. ### Discovery: `_resolve_processor_target()` @@ -59,14 +65,15 @@ When `DagFileProcessorProcess.start()` needs to parse a file: ``` _resolve_processor_target(path, bundle_name, bundle_path) - for each class_path in ProvidersManager().coordinators: - coordinator_cls = import_string(class_path) - if coordinator_cls.can_handle_dag_file(bundle_name, path): - return functools.partial(coordinator_cls.run_dag_parsing, path=..., bundle_name=..., bundle_path=...) + for entry in conf.get("sdk", "coordinators"): + coordinator_cls = import_string(entry["classpath"]) + coordinator = coordinator_cls(name=entry["name"], **entry.get("kwargs", {})) + if coordinator.can_handle_dag_file(bundle_name, path): + return functools.partial(coordinator.run_dag_parsing, path=..., bundle_name=..., bundle_path=...) return None # fall back to default Python parser ``` -The first coordinator whose `can_handle_dag_file()` returns `True` wins. If none match, the default Python `_parse_file_entrypoint` runs. +The first coordinator instance whose `can_handle_dag_file()` returns `True` wins. If none match, the default Python `_parse_file_entrypoint` runs. Instances are constructed lazily from `[sdk] coordinators` and cached for the lifetime of the host process. ### Transport: Why msgpack over TCP Loopback @@ -122,7 +129,7 @@ Airflow Dag-Processor DagFileProcessorProcess.start(path, bundle_name, bundle_path) │ ├─ _resolve_processor_target() - │ └─ iterates process-coordinators from provider.yaml + │ └─ iterates instances from [sdk] coordinators (airflow.cfg) │ └─ first can_handle_dag_file() == True wins │ ▼ @@ -264,31 +271,35 @@ For DAG parsing, a new language provider needs: - Sends back a `DagFileParsingResult` msgpack frame - Exits -3. **Registration** in `provider.yaml` under `process-coordinators` +3. **Registration** as an entry in `[sdk] coordinators` in `airflow.cfg`, pointing `classpath` at the importable subclass under `airflow.sdk.coordinators.` ### Java as a Concrete Example **JavaCoordinator:** -The Java provider implements all DAG-parsing contracts in a single `BaseCoordinator` subclass: +The Java SDK implements all DAG-parsing contracts in a single `BaseCoordinator` subclass shipped as `apache-airflow-coordinators-java`: ```python -# providers/sdk/java/coordinator.py +# Distribution: apache-airflow-coordinators-java +# Module: airflow.sdk.coordinators.java.coordinator class JavaCoordinator(BaseCoordinator): - sdk = "java" + def __init__(self, *, name, java_executable="java", jvm_args=None, jdk_home=None): + self.name = name + self.java_executable = java_executable + self.jvm_args = list(jvm_args or []) + self.jdk_home = jdk_home - @classmethod - def can_handle_dag_file(cls, bundle_name, path) -> bool: + def can_handle_dag_file(self, bundle_name, path) -> bool: # Returns True when path is a JAR with a Main-Class manifest entry with contextlib.suppress(FileNotFoundError): return find_main_class(Path(path)) is not None return False - @classmethod - def dag_parsing_cmd(cls, *, dag_file_path, bundle_name, bundle_path, comm_addr, logs_addr): + def dag_parsing_cmd(self, *, dag_file_path, bundle_name, bundle_path, comm_addr, logs_addr): main_class = find_main_class(Path(dag_file_path)) return [ - "java", + self.java_executable, + *self.jvm_args, "-classpath", f"{bundle_path}/*", main_class, @@ -299,7 +310,7 @@ class JavaCoordinator(BaseCoordinator): `can_handle_dag_file()` checks that the file is a JAR with a `Main-Class` in its manifest. This ensures the coordinator only claims files it can actually handle. -The classpath is `/*` — a wildcard that includes all JARs in the directory (the application JAR plus its dependencies). +The classpath is `/*` — a wildcard that includes all JARs in the directory (the application JAR plus its dependencies). The `java_executable` and `jvm_args` come from the per-instance `kwargs` declared in `[sdk] coordinators`, so multiple instances (e.g., `jdk-11`, `jdk-17`) can launch different JVMs with different flags from the same class. No separate `JavaDagFileProcessor` class is needed — `BaseCoordinator` consolidates file detection, DAG parsing, and task execution into a single extension point. @@ -395,7 +406,7 @@ Both share test cases defined in `test_dags.yaml`, ensuring the Java SDK produce ## Consequences -- The DAG file processor can be extended to any language without modifying Airflow Core — only a provider with a `BaseCoordinator` subclass is needed. +- The DAG file processor can be extended to any language without modifying Airflow Core — only a `BaseCoordinator` subclass distributed as `apache-airflow-coordinators-` plus an entry in `[sdk] coordinators` is needed. - The language runtime must produce exact DagSerialization v3 JSON, requiring cross-language validation infrastructure (e.g., `test_dags.yaml` + `compare.py`). - The base class absorbs all TCP/process plumbing, so language providers only implement two methods for DAG parsing. - The subprocess bridge adds latency and a process boundary; DAG parsing for non-Python files is inherently slower than in-process Python parsing. diff --git a/java-sdk/adr/0003-workload-execution.md b/java-sdk/adr/0003-workload-execution.md index 027fb7a0ed6a4..d1c698cd44af2 100644 --- a/java-sdk/adr/0003-workload-execution.md +++ b/java-sdk/adr/0003-workload-execution.md @@ -33,7 +33,7 @@ This ADR details the task execution side of the coordinator architecture describ ### Extension Point: `BaseCoordinator` -The same `BaseCoordinator` base class that handles DAG parsing also handles task execution. It is registered in `provider.yaml` under `coordinators`. For task execution, a subclass must implement: +The same `BaseCoordinator` base class that handles DAG parsing also handles task execution. Concrete subclasses ship as standalone distributions (`apache-airflow-coordinators-`, contributing to the `airflow.sdk.coordinators` namespace package) and are activated through `[sdk] coordinators` in `airflow.cfg` — there is no `provider.yaml` involvement. For task execution, a subclass must implement: | Method | Signature | Responsibility | |---|---|---| @@ -52,11 +52,14 @@ The base class provides `run_task_execution()` as a concrete method that handles ### Registration -The same `coordinators` entry in `provider.yaml` covers both DAG parsing and task execution — no separate registration needed: +The same `[sdk] coordinators` entry covers both DAG parsing and task execution — no separate registration needed (see [ADR-0001 — Coordinator Registration](0001-java-sdk-airflow-integration.md#coordinator-registration)): -```yaml -coordinators: - - airflow.providers.sdk..coordinator. +```ini +[sdk] +coordinators = [ + {"name": "jdk-17", "classpath": "airflow.sdk.coordinators.java.JavaCoordinator", "kwargs": {"java_executable": "/usr/lib/jvm/java-17/bin/java", "jvm_args": ["-Xmx1024m"]}} +] +queue_to_coordinator = {"java": "jdk-17"} ``` ### Discovery: `_resolve_runtime_entrypoint()` @@ -67,20 +70,20 @@ When `task_runner.main()` starts, before any Python task execution: task_runner.main() → startup_details = get_startup_details() # reads from fd 0 → _resolve_runtime_entrypoint(startup_details) - for each class_path in ProvidersManagerTaskRuntime().process_coordinators: - coordinator_cls = import_string(class_path) - if not hasattr(coordinator_cls, "run_task_execution"): - continue - return functools.partial(coordinator_cls.run_task_execution, - what=..., dag_rel_path=..., bundle_info=..., startup_details=...) - return None # fall back to default Python execution + coord_name = conf.get("sdk", "queue_to_coordinator").get(startup_details.ti.queue) + if coord_name is None: + return None # fall back to default Python execution + entry = next(e for e in conf.get("sdk", "coordinators") if e["name"] == coord_name) + coordinator = import_string(entry["classpath"])(name=coord_name, **entry.get("kwargs", {})) + return functools.partial(coordinator.run_task_execution, + what=..., dag_rel_path=..., bundle_info=..., startup_details=...) → if runtime_entrypoint is not None: runtime_entrypoint() # language-specific execution return # short-circuit — skip Python execution entirely ``` -> **Note:** Currently the first coordinator with `run_task_execution` wins. `QueueToCoordinatorMapper` maps the task's `queue` to the correct coordinator via the `[sdk] queue_to_sdk` configuration. +> **Note:** `QueueToCoordinatorMapper` resolves the task's `queue` against `[sdk] queue_to_coordinator` to pick the coordinator instance name, then looks that name up in `[sdk] coordinators` and instantiates the `classpath` with the entry's `kwargs`. Two queues mapped to two different instances of the same class (e.g., `jdk-11` and `jdk-17`) execute on different JVMs with different flags. ### Expected E2E Flow @@ -98,8 +101,8 @@ task_runner.main() ├─ get_startup_details() ← reads StartupDetails from fd 0 │ ├─ _resolve_runtime_entrypoint() - │ └─ iterates coordinators from provider.yaml - │ └─ first with run_task_execution wins + │ └─ resolves queue → instance name via [sdk] queue_to_coordinator + │ └─ instantiates the matching entry from [sdk] coordinators │ ▼ Coordinator.run_task_execution(what, dag_rel_path, bundle_info, startup_details) @@ -232,13 +235,13 @@ The language runtime is **ephemeral and one-process-per-task**: - Parallelism on a single worker therefore equals the number of concurrently running task processes. Five concurrent Java tasks on one worker means five JVMs. - DAG parsing has the same shape: each `DagFileProcessorProcess` child handles one parse request and exits. The language runtime spawned underneath it inherits that ephemerality. -**Worker capability is opt-in.** A worker can run a non-Python task only if the corresponding `apache-airflow-providers-sdk-` provider is installed and the language toolchain (e.g., a JRE) is on the host. There is no requirement that every worker support every language. Routing relies on: +**Worker capability is opt-in.** A worker can run a non-Python task only if the corresponding `apache-airflow-coordinators-` distribution is installed, the matching coordinator instance is declared in `[sdk] coordinators`, and the language toolchain (e.g., a JRE) is on the host. There is no requirement that every worker support every language. Routing relies on: | Layer | Mechanism | |---|---| | Author intent | Operator / `@task.stub` declares `queue="java"` (or any custom queue) | | Worker selection | The executor (Celery, Kubernetes, etc.) routes the task to a worker that consumes that queue, exactly as it does for Python tasks today | -| Runtime selection | Inside the task runner, `[sdk] queue_to_sdk` maps the queue name to the coordinator's `sdk` value (`"java"`); `_resolve_runtime_entrypoint` then dispatches into `JavaCoordinator.run_task_execution` | +| Runtime selection | Inside the task runner, `[sdk] queue_to_coordinator` maps the queue name to a coordinator instance name; that name is resolved against `[sdk] coordinators` to instantiate the configured class with its `kwargs`; `_resolve_runtime_entrypoint` then dispatches into `.run_task_execution` | The deployment model is the same one that already applies to Python providers: install what your DAGs need, on the hosts they run on. Multi-language workers are possible (install both providers and both toolchains) but not required. @@ -257,12 +260,13 @@ The first message the runtime receives is `StartupDetails`, which provides full | `ti_context` | `TIRunContext` | DAG run context (logical date, data interval, etc.) | | `sentry_integration` | string | Sentry DSN for error reporting (optional) | -### What a Language Provider Must Implement +### What a Language SDK Must Implement -For task execution, a new language provider needs: +For task execution, a new language SDK needs: 1. **A `BaseCoordinator` subclass** with: - - `task_execution_cmd()` — returns the command to launch the runtime + - An `__init__` that accepts the kwargs the operator will declare in `[sdk] coordinators` (e.g., interpreter path, language-specific runtime flags) + - `task_execution_cmd()` — returns the command to launch the runtime, typically using attributes set in `__init__` - (This is the same subclass that implements `can_handle_dag_file()` and `dag_parsing_cmd()` for DAG parsing — one class covers both) 2. **A runtime process** that: @@ -278,7 +282,7 @@ For task execution, a new language provider needs: 4. **A client API** that wraps the socket protocol behind a simple interface (get_connection, get_variable, get_xcom, set_xcom) so task authors don't deal with framing -5. **Registration** in `provider.yaml` under `coordinators` (same entry as DAG parsing — no separate registration) +5. **Distribution** as `apache-airflow-coordinators-`, contributing the subclass under `airflow.sdk.coordinators.` (same module path as the DAG-parsing entry — one class, one import path) ### Java as a Concrete Example @@ -287,21 +291,25 @@ For task execution, a new language provider needs: The same `JavaCoordinator` that handles DAG parsing also handles task execution — no separate `JavaTaskCoordinator` class is needed: ```python -# providers/sdk/java/coordinator.py +# Distribution: apache-airflow-coordinators-java +# Module: airflow.sdk.coordinators.java.coordinator class JavaCoordinator(BaseCoordinator): - sdk = "java" + def __init__(self, *, name, java_executable="java", jvm_args=None, jdk_home=None): + self.name = name + self.java_executable = java_executable + self.jvm_args = list(jvm_args or []) + self.jdk_home = jdk_home - @classmethod - def can_handle_dag_file(cls, bundle_name, path) -> bool: + def can_handle_dag_file(self, bundle_name, path) -> bool: with contextlib.suppress(FileNotFoundError): return find_main_class(Path(path)) is not None return False - @classmethod - def dag_parsing_cmd(cls, *, dag_file_path, bundle_name, bundle_path, comm_addr, logs_addr): + def dag_parsing_cmd(self, *, dag_file_path, bundle_name, bundle_path, comm_addr, logs_addr): main_class = find_main_class(Path(dag_file_path)) return [ - "java", + self.java_executable, + *self.jvm_args, "-classpath", f"{bundle_path}/*", main_class, @@ -309,12 +317,12 @@ class JavaCoordinator(BaseCoordinator): f"--logs={logs_addr}", ] - @classmethod - def task_execution_cmd(cls, *, what, dag_rel_path, bundle_info, comm_addr, logs_addr): + def task_execution_cmd(self, *, what, dag_rel_path, bundle_info, comm_addr, logs_addr): jar_path = Path(dag_rel_path) main_class = find_main_class(jar_path) return [ - "java", + self.java_executable, + *self.jvm_args, "-classpath", f"{jar_path.parent}/*", main_class, @@ -323,7 +331,7 @@ class JavaCoordinator(BaseCoordinator): ] ``` -One class, one `provider.yaml` entry, covers both DAG parsing and task execution. +One class, one importable `classpath`, covers both DAG parsing and task execution. Operators register it once per JVM variant in `[sdk] coordinators` and route queues to those instances via `[sdk] queue_to_coordinator`. **Java SDK Task Interface:** diff --git a/java-sdk/adr/0005-coordinator-packaging.md b/java-sdk/adr/0005-coordinator-packaging.md index 18f8080784046..5015d5ce4fa1f 100644 --- a/java-sdk/adr/0005-coordinator-packaging.md +++ b/java-sdk/adr/0005-coordinator-packaging.md @@ -21,60 +21,105 @@ ## Status -Proposed — open for discussion. The packaging shipped with the initial Java SDK PRs (`apache-airflow-providers-sdk-java` under `providers/sdk/java/`, registered through `ProvidersManager`) is the *starting point*; this ADR enumerates the alternatives raised on PR #65958 so they can be decided before a second language SDK lands. +Accepted — coordinators are a new distribution type, **not** Airflow providers, and are activated through Airflow configuration rather than `provider.yaml`. Tracked operationally in [apache/airflow#66451](https://github.com/apache/airflow/issues/66451). ## Context -[ADR-0001](0001-java-sdk-airflow-integration.md) introduces a coordinator extension point and ships the Java implementation as an Airflow provider. Reviewers on PR #65958 raised three related but separable questions: +[ADR-0001](0001-java-sdk-airflow-integration.md) introduces a coordinator extension point. Reviewers on PR #65958 raised three related but separable questions: -1. **PyPI package name.** Should the Java coordinator ship as `apache-airflow-providers-sdk-java` (consistent with every other provider) or as `apache-airflow-coordinator-java` (recognizing that "language coordinator" is a structurally new kind of distribution that does not behave like operators/hooks/sensors)? -2. **Source-tree module layout.** Should it live under `providers/sdk/java/` alongside other providers, under a nested `providers/coordinators/java/`, or at a new top-level `coordinators/` directory peer to `providers/`, `airflow-core/`, and `task-sdk/`? -3. **Discovery / registration mechanism.** Should coordinator classes be discovered through the existing `ProvidersManager` (and its task-runtime equivalent `ProvidersManagerTaskRuntime`), or through a dedicated `CoordinatorManager` (likely living in a `_shared` library because both Airflow Core and Task SDK need to consume it)? +1. **PyPI package name.** Should the Java coordinator ship as `apache-airflow-providers-sdk-java` (consistent with every other provider) or as `apache-airflow-coordinators-java` (recognizing that "language coordinator" is a structurally new kind of distribution that does not behave like operators/hooks/sensors)? +2. **Source-tree module layout.** Should it live under `providers/sdk/java/` alongside other providers, or as a new top-level peer to `providers/`, `airflow-core/`, and `task-sdk/`? +3. **Discovery / registration mechanism.** Should coordinator classes be discovered through the existing `ProvidersManager` (and its task-runtime equivalent `ProvidersManagerTaskRuntime`), or through some other mechanism? -A related concern raised separately on the same PR is **discoverability and user confusion**: providers appear in the Airflow registry / docs, so "`apache-airflow-providers-sdk-java` exists but `apache-airflow-providers-sdk-go` does not" is visible to end users today (the Go SDK currently ships through Edge-Worker, not as a coordinator). The naming choice affects how prominent that asymmetry is, but it is a **transitional** problem: once Go-SDK migrates to the coordinator interface (planned for 3.3), the asymmetry disappears regardless of which name is chosen. +A second concern, raised separately, is **runtime configuration**: a single `JavaCoordinator` class is not enough to express "use JDK 11 for the legacy queue and JDK 17 for the modern queue, with different `-Xmx` values." Class-only registration forces operators to subclass for every variant or hardcode environment lookups, which the issue calls out explicitly: -## Decision (provisional) +> How can I use different JDK version? How can I use different JVM arguments? We hardcoded the subprocess cmd … so users have to subclass another Coordinator to override the Java config. +> — [apache/airflow#66451](https://github.com/apache/airflow/issues/66451) -**Adopted for the initial PRs:** option **A1** for naming, **B1** for layout, **C1** for registration — i.e., `apache-airflow-providers-sdk-java` under `providers/sdk/java/`, registered via `ProvidersManager`. This is the path of least resistance for landing the SDK and unblocks downstream PRs. +The existing `[sdk] queue_to_sdk` config (introduced in [ADR-0001](0001-java-sdk-airflow-integration.md)) maps a queue to a *language*, not to a *runtime variant*, and is therefore insufficient for this need. -**Open to revisit before a second language SDK lands** (Go-SDK migration): the options below. +## Decision -### A. PyPI package name +### A. Distribution name: `apache-airflow-coordinators-` -| Option | Name | Argument for | Argument against | -|---|---|---|---| -| **A1** *(current)* | `apache-airflow-providers-sdk-java` | Consistent with the existing provider taxonomy; no new release machinery; user-installation muscle memory (`pip install apache-airflow-providers-…`) carries over. | A coordinator does not expose operators / hooks / sensors / triggers; calling it a "provider" stretches the term. | -| **A2** | `apache-airflow-coordinator-java` | Names the component for what it is — a runtime/coordinator plugin, structurally distinct from a normal provider. Marks it as a new distribution type early, before precedent calcifies (`fab` is the cautionary example reviewers cited). | New distribution type means new release docs, new constraints handling, possibly new versioning conventions vs Airflow Core. Unfamiliar `pip install` shape for users. | -| **A3** | `apache-airflow-sdk-java` | Cleanest from a user perspective — "I'm authoring DAGs in language X, I install the language-X SDK." | Conflicts with the in-tree Python `task-sdk` naming; ambiguous whether the package contains the *user-facing* SDK or the *coordinator* glue. | +Coordinators are not Airflow providers; they are a separate distribution type. The Java coordinator ships as **`apache-airflow-coordinators-java`**. New language coordinators follow the same pattern (`apache-airflow-coordinators-go`, `apache-airflow-coordinators-rust`, …). -### B. Source-tree layout +A coordinator distribution exposes: -| Option | Path | Argument for | Argument against | -|---|---|---|---| -| **B1** *(current)* | `providers/sdk/java/` | Already in the providers monorepo conventions; no new top-level directory; ProvidersManager already scans `providers/`. | Visually lumps coordinators together with op/hook/sensor providers. | -| **B2** | `providers/coordinators/java/` | Keeps coordinators inside `providers/` (so existing tooling still finds them) but groups them as a sub-category, signaling that they are not normal providers. | Slight tooling change (provider discovery would need to recurse into the coordinator subtree). Still inherits "this is a provider" framing. | -| **B3** | `coordinators/` (new top-level peer to `airflow-core/`, `task-sdk/`, `providers/`) | Strongest separation; matches the A2 naming. Easier to apply different release / docs rules. | New top-level directory, new uv workspace member, new CI matrix entry. Bigger change for a still-debated decision. | +- A `BaseCoordinator` subclass under `airflow.sdk.coordinators.`. +- No operators, hooks, sensors, triggers, or `provider.yaml`. -### C. Discovery / registration +### B. Module layout: namespace package under `airflow.sdk.coordinators` -| Option | Mechanism | Argument for | Argument against | -|---|---|---|---| -| **C1** *(current)* | Existing `ProvidersManager` (`airflow-core`) and `ProvidersManagerTaskRuntime` (`task-sdk`) discover the `coordinators` key in `provider.yaml`. | Reuses the discovery pipeline already present in both Core and Task SDK. Zero new infrastructure. | Conceptually couples coordinators to providers even if A2/B3 are picked. | -| **C2** | New `CoordinatorManager`, likely in a shared library (`shared/coordinator-manager/`) so both Core and Task SDK can consume it without import cycles. Coordinators self-register through this manager (e.g., entry-points group `airflow.coordinators` rather than `provider.yaml`). | Decouples coordinator discovery from provider discovery; cleaner separation aligned with A2/B3. Allows coordinator-specific lifecycle hooks (init, version-handshake, capability advertisement) without adding them to `ProvidersManager`. | New shared distribution + its symlink wiring; migration of the Java SDK off `provider.yaml` registration; doubled discovery code paths during transition. | +Each coordinator distribution contributes a subpackage to the **namespace package** `airflow.sdk.coordinators`. The Task SDK owns the namespace; concrete coordinator distributions add `airflow.sdk.coordinators.`. -## Recommendation +The Java coordinator therefore resolves as: -The three axes are correlated but not strictly tied. A reasonable consistent set is: +```python +from airflow.utils.module_loading import import_string -- **Conservative, ship-now:** A1 + B1 + C1 (current state). Lowest risk, lowest change footprint, accepts that "provider" is a slight term-of-art stretch. -- **Aligned-rename:** A2 + B2 + C1. Renames the distribution to `apache-airflow-coordinator-java`, keeps the source under `providers/coordinators//`, reuses the discovery infrastructure. This is the cheapest option that clearly *signals* the new component type without re-platforming discovery. -- **Full split:** A2 + B3 + C2. Cleanest end state for when there are multiple language coordinators plus possibly future static-source parsers (cf. the YAML / dag-factory discussion in [ADR-0001](0001-java-sdk-airflow-integration.md#coordinator-interface-subprocess-based-by-design)), but the highest churn and the option that would benefit most from being decided on the dev list as part of a follow-up AIP rather than during the Java-SDK PR review. +JavaCoordinator = import_string("airflow.sdk.coordinators.java.JavaCoordinator") +``` -The current choice (A1+B1+C1) is intended to be reversible: renaming a not-yet-released distribution and moving its source tree are both mechanical changes. The decision to defer is itself a choice — reviewers who want a different end state should call it out before the SDK ships in `3.3`, not after. +Both Airflow Core (DAG processor) and the Task SDK (task runner) import coordinators by this path. As long as `apache-airflow-coordinators-java` is installed on a host, that `import_string` call resolves correctly without any registry lookup. + +### C. Discovery via `[sdk] coordinators` (Airflow configuration) + +Coordinators are **not** discovered through `ProvidersManager` / `ProvidersManagerTaskRuntime`, and there is no `coordinators` key in `provider.yaml`. They are registered as instance entries in `airflow.cfg`: + +```ini +[sdk] +coordinators = [ + { + "name": "jdk-11", + "classpath": "airflow.sdk.coordinators.java.JavaCoordinator", + "kwargs": { + "java_executable": "/usr/lib/jvm/java-11-openjdk-amd64/bin/java", + "jvm_args": ["-Xmx512m"], + "jdk_home": "/usr/lib/jvm/java-11-openjdk-amd64" + } + }, + { + "name": "jdk-17", + "classpath": "airflow.sdk.coordinators.java.JavaCoordinator", + "kwargs": { + "java_executable": "/usr/lib/jvm/java-17-openjdk-amd64/bin/java", + "jvm_args": ["-Xmx1024m", "-Xms256m"], + "jdk_home": "/usr/lib/jvm/java-17-openjdk-amd64" + } + } +] + +queue_to_coordinator = {"legacy-java-queue": "jdk-11", "modern-java-queue": "jdk-17"} +``` + +The shape is intentionally similar to `AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST`: a list of self-describing entries with `name`, `classpath`, and free-form `kwargs`. + +**Renames vs ADR-0001's earlier draft:** + +| Old (`[sdk] queue_to_sdk`) | New (`[sdk] queue_to_coordinator`) | +|---|---| +| Maps queue → language tag (e.g., `"java"`) | Maps queue → coordinator instance name (e.g., `"jdk-17"`) | +| One coordinator per language | Many coordinator instances per language, distinguished by `kwargs` | + +`queue_to_coordinator` replaces `queue_to_sdk` everywhere. + +### Why not `provider.yaml` / `ProvidersManager`? + +Coordinators are not providers in the Airflow sense: + +- They expose no operators / hooks / sensors / triggers. +- They are consumed by both Airflow Core (in the DAG processor) **and** the Task SDK (in the task runner). The provider system is not designed to be loaded from inside a worker subprocess that intentionally has no Airflow-Core import. +- They need **per-instance** runtime configuration (interpreter path, JVM flags, …). `provider.yaml` registers classes, not instances, and bolting kwargs onto provider entries would distort the provider data model. +- A coordinator is the only thing in this distribution; there is no benefit to sharing the provider's discoverability surface (registry listings, `airflow providers list`, etc.). On the contrary, listing `apache-airflow-providers-sdk-java` next to AWS/GCP providers is misleading for users. + +Putting the registry in `airflow.cfg` keeps the data model honest (instances, with their kwargs) and makes the per-host opt-in (install + config-edit) explicit rather than implicit (install-implies-active). ## Consequences -- The Java SDK ships under provider naming/layout in 3.3-preview; if the project later picks A2/B2/B3/C2, the rename becomes a single-PR refactor with deprecation shims (or a hard rename if the SDK is still in preview and we can break unreleased import paths). -- The Go-SDK migration to the coordinator interface (planned for 3.3) is the natural forcing function for a final decision: a second language SDK lands, and the cost of disagreement compounds. -- `ProvidersManager` accumulates an extra extension point (`coordinators`) that is not really an Airflow "provider" responsibility. This is acceptable as a transitional state but is the strongest argument for option C2 over time. -- Documentation must be explicit that "Java appears in the provider list, Go does not" is a transitional quirk, not a stable property. Whichever naming option is picked, release notes for 3.3 should call out the answer for both languages together. +- **`apache-airflow-coordinators-java`** ships as a new distribution type with its own release docs and constraints handling, distinct from providers. +- **`airflow.sdk.coordinators`** is a namespace package owned by the Task SDK; concrete coordinator distributions contribute subpackages to it. Multiple coordinator distributions can be installed side by side without colliding. +- **`[sdk] coordinators`** carries instance-level configuration; **`[sdk] queue_to_coordinator`** carries queue → instance routing. `[sdk] queue_to_sdk` is removed. +- Operators can register multiple instances of the same coordinator class (e.g., `jdk-11` and `jdk-17`) and bind different queues to them — solving the multi-JDK and JVM-flag use cases raised in [apache/airflow#66451](https://github.com/apache/airflow/issues/66451) without subclassing. +- The provider registry no longer shows coordinators, removing the "Java appears, Go does not" asymmetry that earlier drafts of this ADR flagged as a transitional UX wart. +- Future static-source DAG parsers (e.g., YAML / `dag-factory`) that fit the same coordinator shape can use the same `[sdk] coordinators` registry without inventing a new extension point. From fdb38caee184bc4c0cc9be4a9b396cf8f06613cc Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Wed, 6 May 2026 20:29:25 +0800 Subject: [PATCH 15/16] Pass runtime context to Task::execute This allows the task to access runtime information similar to Python's template context. We should be more stingent what we put in this (things like conn and var should use the client instead), but these probably must go in. This should be done before the initial release of the SDK to avoid backward compatibility issues. --- .../airflow/example/JavaExampleBuilder.java | 7 +-- .../kotlin/org/apache/airflow/sdk/Context.kt | 48 +++++++++++++++++++ .../kotlin/org/apache/airflow/sdk/Task.kt | 5 +- .../airflow/sdk/execution/TaskRunner.kt | 3 +- .../airflow/sdk/execution/TaskRunnerTest.kt | 11 ++++- 5 files changed, 67 insertions(+), 7 deletions(-) create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Context.kt diff --git a/java-sdk/example/src/java/org/apache/airflow/example/JavaExampleBuilder.java b/java-sdk/example/src/java/org/apache/airflow/example/JavaExampleBuilder.java index e6ec386312088..73edb16f683c5 100644 --- a/java-sdk/example/src/java/org/apache/airflow/example/JavaExampleBuilder.java +++ b/java-sdk/example/src/java/org/apache/airflow/example/JavaExampleBuilder.java @@ -22,6 +22,7 @@ import java.util.Date; import java.util.List; import org.apache.airflow.sdk.*; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,7 +30,7 @@ public class JavaExampleBuilder { private static final Logger logger = LoggerFactory.getLogger(JavaExampleBuilder.class); public static class Extract implements Task { - public void execute(Client client) throws Exception { + public void execute(@NotNull Context context, Client client) throws Exception { logger.info("Hello from task"); var python_xcom = client.getXCom("python_task_1"); @@ -49,7 +50,7 @@ public void execute(Client client) throws Exception { } public static class Transform implements Task { - public void execute(Client client) { + public void execute(@NotNull Context context, Client client) { var extract_xcom = client.getXCom("extract"); logger.info("Got XCom from 'extract' {}", extract_xcom); @@ -62,7 +63,7 @@ public void execute(Client client) { } public static class Load implements Task { - public void execute(Client client) { + public void execute(@NotNull Context context, Client client) { var xcom = client.getXCom("transform"); logger.info("Got XCom from 'transform' {}", xcom); throw new RuntimeException("I failed"); diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Context.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Context.kt new file mode 100644 index 0000000000000..0ab48f09b86c2 --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Context.kt @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.airflow.sdk + +import org.apache.airflow.sdk.execution.StartupDetails + +data class DagRun( + @JvmField val dagId: String, + @JvmField val runId: String, +) + +data class TaskInstance( + @JvmField val dagId: String, + @JvmField val runId: String, + @JvmField val taskId: String, + @JvmField val mapIndex: Int?, + @JvmField val tryNumber: Int, +) + +data class Context( + @JvmField val dagRun: DagRun, + @JvmField val ti: TaskInstance, +) { + internal companion object { + fun from(request: StartupDetails): Context = + Context( + dagRun = with(request.tiContext.dagRun) { DagRun(dagId, runId) }, + ti = with(request.ti) { TaskInstance(dagId, runId, taskId, mapIndex, tryNumber) }, + ) + } +} diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Task.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Task.kt index e65523e2c1dc6..b24ad75668eab 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Task.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Task.kt @@ -23,5 +23,8 @@ import kotlin.Throws interface Task { @Throws(Exception::class) - fun execute(client: Client) + fun execute( + context: Context, + client: Client, + ) } diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskRunner.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskRunner.kt index 8223276e1fb5d..c88620540072a 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskRunner.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/execution/TaskRunner.kt @@ -21,6 +21,7 @@ package org.apache.airflow.sdk.execution import org.apache.airflow.sdk.Bundle import org.apache.airflow.sdk.Client +import org.apache.airflow.sdk.Context object TaskRunner { fun run( @@ -37,7 +38,7 @@ object TaskRunner { val task = bundle.dags[request.ti.dagId]?.tasks[request.ti.taskId] ?: return TaskState("removed") val instance = task.getDeclaredConstructor().newInstance() return try { - instance.execute(client) + instance.execute(Context.from(request), client) SucceedTask() } catch (e: Exception) { e.printStackTrace() diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TaskRunnerTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TaskRunnerTest.kt index 024d2d132f97d..0f2bb53bc8d78 100644 --- a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TaskRunnerTest.kt +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/execution/TaskRunnerTest.kt @@ -21,6 +21,7 @@ package org.apache.airflow.sdk.execution import org.apache.airflow.sdk.Bundle import org.apache.airflow.sdk.Client +import org.apache.airflow.sdk.Context import org.apache.airflow.sdk.Dag import org.apache.airflow.sdk.Task import org.apache.airflow.sdk.execution.api.model.BundleInfo @@ -119,11 +120,17 @@ class TaskRunnerTest { ) class SuccessTask : Task { - override fun execute(client: Client) { + override fun execute( + context: Context, + client: Client, + ) { } } class FailingTask : Task { - override fun execute(client: Client): Unit = throw IllegalStateException("boom") + override fun execute( + context: Context, + client: Client, + ): Unit = throw IllegalStateException("boom") } } From ae91075cf30d038a7361ff641337bbe091a37ef8 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Thu, 7 May 2026 21:33:10 +0800 Subject: [PATCH 16/16] Introduce Builder annotations (#1576) * Introduce DagBuilder annotations This provides two annotations: DagBuilder and DagBuilder.Task that *slightly* reduces the boilerplate needed to define tasks in Java (about two lines per task, and two more lines for the dag). The way this works is you do @DagBuilder public class MyDag { @DagBuilder.Task public void myTask(...) { ... } } and the compiler uses our annotation processor to generate a wrapper class named MyDagBuilder with the needed tasks and dependencies defined. You then can register the dag to the bundle via the builder by calling MyDagBuilder.build() in the BundleBuilder's getDags(). A lot of code for a little benefit for the moment, but this should make taskflow-style XCom a lot easier. (This is not implemented yet.) * Push XCom from return value * Support auto XCom set and get Generate extra code around the annotated function to pass in XCom references, and set XCom from the return value. * Move builder annotations to 'Builder' * Allow customizing generated builder class name * Add tests for annotation processor --- java-sdk/example/build.gradle.kts | 2 + .../airflow/example/AnnotationExample.java | 67 +++++ .../airflow/example/ExampleBundleBuilder.java | 2 +- ...lder.java => InterfaceExampleBuilder.java} | 16 +- java-sdk/sdk/build.gradle.kts | 3 + .../kotlin/org/apache/airflow/sdk/Builder.kt | 243 ++++++++++++++++++ .../main/kotlin/org/apache/airflow/sdk/Dag.kt | 8 +- .../javax.annotation.processing.Processor | 1 + .../org/apache/airflow/sdk/BuilderTest.kt | 239 +++++++++++++++++ 9 files changed, 571 insertions(+), 10 deletions(-) create mode 100644 java-sdk/example/src/java/org/apache/airflow/example/AnnotationExample.java rename java-sdk/example/src/java/org/apache/airflow/example/{JavaExampleBuilder.java => InterfaceExampleBuilder.java} (84%) create mode 100644 java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Builder.kt create mode 100644 java-sdk/sdk/src/main/resources/META-INF/services/javax.annotation.processing.Processor create mode 100644 java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BuilderTest.kt diff --git a/java-sdk/example/build.gradle.kts b/java-sdk/example/build.gradle.kts index 674bde14cd0da..d1565a53bae58 100644 --- a/java-sdk/example/build.gradle.kts +++ b/java-sdk/example/build.gradle.kts @@ -22,6 +22,7 @@ plugins { } dependencies { + annotationProcessor(project(":sdk")) implementation(project(":sdk")) implementation("org.slf4j:slf4j-simple:2.0.17") } @@ -44,6 +45,7 @@ val dagCodeFileName = bundleMainClass.substringAfterLast('.') + ".java" val inspectBundle = tasks.register("inspectBundle") { + description = "Collect Dag structures by inspecting the Dag bundle" dependsOn("classes") classpath = sourceSets.main.get().runtimeClasspath mainClass.set("org.apache.airflow.sdk.BundleInspector") diff --git a/java-sdk/example/src/java/org/apache/airflow/example/AnnotationExample.java b/java-sdk/example/src/java/org/apache/airflow/example/AnnotationExample.java new file mode 100644 index 0000000000000..4068f36ef0253 --- /dev/null +++ b/java-sdk/example/src/java/org/apache/airflow/example/AnnotationExample.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.airflow.example; + +import org.apache.airflow.sdk.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Date; + +@Builder.Dag(id = "java_annotation_example") +public class AnnotationExample { + private static final Logger logger = LoggerFactory.getLogger(AnnotationExample.class); + + @Builder.Task(id = "extract") + public long extractValue(Client client) throws InterruptedException { + logger.info("Hello from task"); + + var pythonXcom = client.getXCom("python_task_1"); + logger.info("Got XCom from Python Task 'python_task_1' {}", pythonXcom); + + var connection = client.getConnection("test_http"); + logger.info("Got con {}", connection); + + for (var i = 0; i < 3; i++) { + logger.info("Beep {}, next time will be {}", i, new Date()); + Thread.sleep(2 * 1000); + } + + logger.info("Goodbye from task"); + return new Date().getTime(); + } + + @Builder.Task(id = "transform", depends = {"extract"}) + public long transformValue(Client client, @Builder.XCom(task = "extract") long extracted) { + logger.info("Got XCom from 'extract' {}", extracted); + + var variable = client.getVariable("my_variable"); + logger.info("Got variable {}", variable); + + logger.info("Push XCom to python task 2"); + return new Date().getTime(); + } + + @Builder.Task(depends = {"transform"}) + public void load(@Builder.XCom(task = "transform") long transformed) { + logger.info("Got XCom from 'transform' {}", transformed); + throw new RuntimeException("I failed"); + } +} diff --git a/java-sdk/example/src/java/org/apache/airflow/example/ExampleBundleBuilder.java b/java-sdk/example/src/java/org/apache/airflow/example/ExampleBundleBuilder.java index 1368d891e97ea..0aa729d00030e 100644 --- a/java-sdk/example/src/java/org/apache/airflow/example/ExampleBundleBuilder.java +++ b/java-sdk/example/src/java/org/apache/airflow/example/ExampleBundleBuilder.java @@ -27,7 +27,7 @@ public class ExampleBundleBuilder implements BundleBuilder { @NotNull @Override public Iterable getDags() { - return List.of(JavaExampleBuilder.build()); + return List.of(InterfaceExampleBuilder.build(), AnnotationExampleBuilder.build()); } public static void main(String[] args) { diff --git a/java-sdk/example/src/java/org/apache/airflow/example/JavaExampleBuilder.java b/java-sdk/example/src/java/org/apache/airflow/example/InterfaceExampleBuilder.java similarity index 84% rename from java-sdk/example/src/java/org/apache/airflow/example/JavaExampleBuilder.java rename to java-sdk/example/src/java/org/apache/airflow/example/InterfaceExampleBuilder.java index 73edb16f683c5..2599d90cc4317 100644 --- a/java-sdk/example/src/java/org/apache/airflow/example/JavaExampleBuilder.java +++ b/java-sdk/example/src/java/org/apache/airflow/example/InterfaceExampleBuilder.java @@ -26,15 +26,15 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class JavaExampleBuilder { - private static final Logger logger = LoggerFactory.getLogger(JavaExampleBuilder.class); +public class InterfaceExampleBuilder { + private static final Logger logger = LoggerFactory.getLogger(InterfaceExampleBuilder.class); public static class Extract implements Task { public void execute(@NotNull Context context, Client client) throws Exception { logger.info("Hello from task"); - var python_xcom = client.getXCom("python_task_1"); - logger.info("Got XCom from Python Task 'python_task_1' {}", python_xcom); + var pythonInput = client.getXCom("python_task_1"); + logger.info("Got XCom from Python Task 'python_task_1' {}", pythonInput); var connection = client.getConnection("test_http"); logger.info("Got con {}", connection); @@ -51,8 +51,8 @@ public void execute(@NotNull Context context, Client client) throws Exception { public static class Transform implements Task { public void execute(@NotNull Context context, Client client) { - var extract_xcom = client.getXCom("extract"); - logger.info("Got XCom from 'extract' {}", extract_xcom); + var extracted = client.getXCom("extract"); + logger.info("Got XCom from 'extract' {}", extracted); var variable = client.getVariable("my_variable"); logger.info("Got variable {}", variable); @@ -64,8 +64,8 @@ public void execute(@NotNull Context context, Client client) { public static class Load implements Task { public void execute(@NotNull Context context, Client client) { - var xcom = client.getXCom("transform"); - logger.info("Got XCom from 'transform' {}", xcom); + var transformed = client.getXCom("transform"); + logger.info("Got XCom from 'transform' {}", transformed); throw new RuntimeException("I failed"); } } diff --git a/java-sdk/sdk/build.gradle.kts b/java-sdk/sdk/build.gradle.kts index 3e41ea0464109..b494a72ce1b22 100644 --- a/java-sdk/sdk/build.gradle.kts +++ b/java-sdk/sdk/build.gradle.kts @@ -46,6 +46,7 @@ dependencies { implementation("com.fasterxml.jackson.core:jackson-databind:2.21.0") implementation("com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.21.0") implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.21.0") + implementation("com.squareup:javapoet:1.13.0") implementation("com.squareup.retrofit2:converter-jackson:3.0.0") implementation("com.squareup.retrofit2:converter-scalars:3.0.0") implementation("com.squareup.retrofit2:retrofit:3.0.0") @@ -59,6 +60,7 @@ dependencies { implementation("org.msgpack:jackson-dataformat-msgpack:0.9.11") testImplementation(kotlin("test")) + testImplementation("com.google.testing.compile:compile-testing:0.23.0") testImplementation("com.squareup.okhttp3:mockwebserver:4.12.0") } @@ -122,6 +124,7 @@ abstract class GenerateConstantsTask : DefaultTask() { } tasks.register("generateConstants") { + description = "Generate constants to use in code from build configurations." airflowExecApiVersionProp = airflowExecApiVersion outputDirProp = constantsDir } diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Builder.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Builder.kt new file mode 100644 index 0000000000000..ff278e9ff569c --- /dev/null +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Builder.kt @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +@file:Suppress("PLATFORM_CLASS_MAPPED_TO_KOTLIN") + +package org.apache.airflow.sdk + +import com.squareup.javapoet.ClassName +import com.squareup.javapoet.JavaFile +import com.squareup.javapoet.MethodSpec +import com.squareup.javapoet.TypeName +import com.squareup.javapoet.TypeSpec +import javax.annotation.processing.AbstractProcessor +import javax.annotation.processing.ProcessingEnvironment +import javax.annotation.processing.RoundEnvironment +import javax.annotation.processing.SupportedAnnotationTypes +import javax.annotation.processing.SupportedSourceVersion +import javax.lang.model.SourceVersion +import javax.lang.model.element.ExecutableElement +import javax.lang.model.element.Modifier +import javax.lang.model.element.TypeElement +import javax.lang.model.type.TypeKind +import javax.lang.model.type.TypeMirror +import javax.tools.Diagnostic + +class Builder internal constructor() { + /** + * Annotation to automate a Dag-builder pattern. + * + * When applied on a class Foo, this generates a FooBuilder class with a static build method + * to create the Dag structure automatically. + * + * @param id Override the Dag ID. If empty or not provided, the annotated class's name is used by default. + * @param to Name of the Dag-builder class. If empty or not provided, use the annotated class name + "Builder". + */ + @Target(AnnotationTarget.CLASS) + @MustBeDocumented + annotation class Dag( + val id: String = "", + val to: String = "", + ) + + /** + * Annotation to automate task definition in a Dag-builder pattern. + * + * @param id Override the task ID. If empty or not provided, the annotated function's name is used by default. + * @param depends List of task IDs this task depends on. + */ + @Target(AnnotationTarget.FUNCTION) + @MustBeDocumented + annotation class Task( + val id: String = "", + val depends: Array = [], + ) + + /** + * Annotation to mark a task definition's method parameter as an XCom input. + * + * @param task The task ID to pull. If empty or not given, the annotated parameter's name is used by default. + * @param key The XCom key to pull. Defaults to the task's return value. + */ + @Target(AnnotationTarget.VALUE_PARAMETER) + @MustBeDocumented + annotation class XCom( + val task: String = "", + val key: String = Client.XCOM_RETURN_KEY, + ) +} + +@SupportedAnnotationTypes("org.apache.airflow.sdk.Builder.Dag") +@SupportedSourceVersion(SourceVersion.RELEASE_11) +class BuilderProcessor : AbstractProcessor() { + override fun process( + annotations: Set, + roundEnv: RoundEnvironment, + ): Boolean { + if (annotations.isEmpty()) return false + roundEnv.getElementsAnnotatedWith(Builder.Dag::class.java).filterIsInstance().forEach { el -> + with(processingEnv) { + runCatching { + JavaFile + .builder( + elementUtils.getPackageOf(el).qualifiedName.toString(), + buildDag(el), + ).build() + .writeTo(filer) + }.onFailure { e -> + messager.printMessage( + Diagnostic.Kind.ERROR, + e.message ?: "Unknown error", + el, + ) + } + } + } + return true + } + + private fun buildDag(el: TypeElement): TypeSpec { + val ann = el.getAnnotation(Builder.Dag::class.java)!! + + val builderClass = + TypeSpec + .classBuilder(ann.to.ifBlank { "${el.simpleName}Builder" }) + .addModifiers(Modifier.PUBLIC, Modifier.FINAL) + + val buildMethod = + MethodSpec + .methodBuilder("build") + .addModifiers(Modifier.PUBLIC, Modifier.STATIC) + .returns(ClassName.get(Dag::class.java)) + .addStatement($$"var dag = new $T($S)", ClassName.get(Dag::class.java), ann.id.ifBlank { el.simpleName }) + + for (inner in el.enclosedElements) { + if (inner !is ExecutableElement) continue + if (inner.isVarArgs) throw IllegalArgumentException("Cannot create task from vararg function ${inner.simpleName}") + + val ann = inner.getAnnotation(Builder.Task::class.java) ?: continue + val innerName = inner.simpleName.toString().replaceFirstChar(Char::uppercase) + + val task = buildTask(innerName, inner, el) + builderClass.addType(task.spec) + + val depends = + task.required + .map { it.taskId } + .plus(ann.depends) + .toTypedArray() + buildMethod.addStatement( + if (depends.isEmpty()) { + $$"dag.addTask($S, $L.class)" + } else { + $$"dag.addTask($S, $L.class, new String[]{$${depends.joinToString { $$"$S" }}})" + }, + ann.id.ifBlank { inner.simpleName }, + innerName, + *depends, + ) + } + + buildMethod.addStatement("return dag") + builderClass.addMethod(buildMethod.build()) + return builderClass.build() + } + + private fun buildTask( + name: String, + inner: ExecutableElement, + parent: TypeElement, + ): BuildTaskResult { + val clientType = ClassName.get(Client::class.java) + val contextType = ClassName.get(Context::class.java) + + val executeSpec = + MethodSpec + .methodBuilder("execute") + .addAnnotation(Override::class.java) + .addModifiers(Modifier.PUBLIC) + .returns(TypeName.VOID) + .addParameter(contextType, "context") + .addParameter(clientType, "client") + .addException(Exception::class.java) + + val required = mutableListOf() + val innerArgs = + with(processingEnv) { + inner.parameters.joinToString { param -> + val anno = param.getAnnotation(Builder.XCom::class.java) + val type = param.asType() + when { + anno != null -> + param.simpleName.toString().also { + required += RequiredXCom(type, it, anno.task.ifBlank { it }) + } + isType(type, clientType) -> "client" + isType(type, contextType) -> "context" + else -> throw IllegalArgumentException("Unsupported task parameter '${param.simpleName}' with type: $type") + } + } + } + required.forEach { + executeSpec.addStatement( + $$"var $L = ($T) client.getXCom($S)", + it.paramName, + with(TypeName.get(it.paramType)) { if (isPrimitive) box() else this }, + it.taskId, + ) + } + if (inner.returnType.kind == TypeKind.VOID) { + $$"new $T().$L($L)" + } else { + $$"client.setXCom(new $T().$L($L))" + }.also { + executeSpec.addStatement( + it, + ClassName.get(parent), + inner.simpleName, + innerArgs, + ) + } + + val spec = + TypeSpec + .classBuilder(name) + .addSuperinterface(Task::class.java) + .addModifiers(Modifier.PUBLIC, Modifier.FINAL, Modifier.STATIC) + .addMethod(executeSpec.build()) + .build() + return BuildTaskResult(spec, required) + } +} + +private fun ProcessingEnvironment.isType( + t: TypeMirror, + c: ClassName, +): Boolean = typeUtils.isSameType(t, elementUtils.getTypeElement(c.canonicalName()).asType()) + +private data class RequiredXCom( + val paramType: TypeMirror, + val paramName: String, + val taskId: String, +) + +private data class BuildTaskResult( + val spec: TypeSpec, + val required: List, +) diff --git a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Dag.kt b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Dag.kt index 17fd498168791..dd1c878f13281 100644 --- a/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Dag.kt +++ b/java-sdk/sdk/src/main/kotlin/org/apache/airflow/sdk/Dag.kt @@ -36,7 +36,7 @@ class Dag( fun addTask( id: String, definition: Class, - dependsOn: List = emptyList(), + dependsOn: Iterable = emptyList(), ) { // TODO: Check duplicate key. tasks[id] = definition @@ -44,4 +44,10 @@ class Dag( dependants.getOrPut(parent) { mutableSetOf() }.add(id) } } + + fun addTask( + id: String, + definition: Class, + dependsOn: Array, + ) = addTask(id, definition, dependsOn.toSet()) } diff --git a/java-sdk/sdk/src/main/resources/META-INF/services/javax.annotation.processing.Processor b/java-sdk/sdk/src/main/resources/META-INF/services/javax.annotation.processing.Processor new file mode 100644 index 0000000000000..f9d6d12ad0cd7 --- /dev/null +++ b/java-sdk/sdk/src/main/resources/META-INF/services/javax.annotation.processing.Processor @@ -0,0 +1 @@ +org.apache.airflow.sdk.BuilderProcessor diff --git a/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BuilderTest.kt b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BuilderTest.kt new file mode 100644 index 0000000000000..681ba7d2eabf4 --- /dev/null +++ b/java-sdk/sdk/src/test/kotlin/org/apache/airflow/sdk/BuilderTest.kt @@ -0,0 +1,239 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.airflow.sdk + +import com.google.testing.compile.CompilationSubject.assertThat +import com.google.testing.compile.Compiler +import com.google.testing.compile.JavaFileObjectSubject +import com.google.testing.compile.JavaFileObjects +import org.junit.jupiter.api.DisplayName +import org.junit.jupiter.api.Test + +private fun compile(source: String) = + Compiler.javac().withProcessors(BuilderProcessor()).compile( + JavaFileObjects.forSourceString("org.apache.airflow.example.TestExample", source), + ) + +private fun JavaFileObjectSubject.hasSourceEquivalentTo( + qual: String, + source: String, +) = hasSourceEquivalentTo( + JavaFileObjects.forSourceString(qual, source), +) + +class BuilderTest { + @Test + @DisplayName("generate builder for dag class") + fun generateBuilderForDagClass() { + val compilation = + compile( + """ + package org.apache.airflow.example; + + import org.apache.airflow.sdk.Builder; + import org.apache.airflow.sdk.Client; + import org.apache.airflow.sdk.Context; + + @Builder.Dag + public class TestExample { + @Builder.Task + public void t1() {} + + @Builder.Task(depends = {"t1"}) + public int t2(Client client) { + return (Integer) client.getXCom("t0"); + } + + @Builder.Task(depends = {"t1", "t2"}) + public void t3(Context ctx, @Builder.XCom(task = "t2") int value) { + System.out.println(String.format("%s %s", ctx.ti, value)); + } + } + """, + ) + + assertThat(compilation) + .generatedSourceFile("org.apache.airflow.example.TestExampleBuilder") + .hasSourceEquivalentTo( + "org.apache.airflow.example.TestExampleBuilder", + """ + package org.apache.airflow.example; + + import java.lang.Exception; + import java.lang.Integer; + import java.lang.Override; + import org.apache.airflow.sdk.Client; + import org.apache.airflow.sdk.Context; + import org.apache.airflow.sdk.Dag; + import org.apache.airflow.sdk.Task; + + public final class TestExampleBuilder { + public static Dag build() { + var dag = new Dag("TestExample"); + dag.addTask("t1", T1.class); + dag.addTask("t2", T2.class, new String[]{"t1"}); + dag.addTask("t3", T3.class, new String[]{"t2", "t1", "t2"}); + return dag; + } + public static final class T1 implements Task { + @Override + public void execute(Context context, Client client) throws Exception { + new TestExample().t1(); + } + } + public static final class T2 implements Task { + @Override + public void execute(Context context, Client client) throws Exception { + client.setXCom(new TestExample().t2(client)); + } + } + public static final class T3 implements Task { + @Override + public void execute(Context context, Client client) throws Exception { + var value = (Integer) client.getXCom("t2"); + new TestExample().t3(context, value); + } + } + } + """, + ) + } + + @Test + @DisplayName("generate builder for dag class with custom dag id") + fun generateBuilderWithCustomDagId() { + val compilation = + compile( + """ + package org.apache.airflow.example; + import org.apache.airflow.sdk.Builder; + @Builder.Dag(id = "foo") public class TestExample {} + """, + ) + assertThat(compilation) + .generatedSourceFile("org.apache.airflow.example.TestExampleBuilder") + .hasSourceEquivalentTo( + "org.apache.airflow.example.TestExampleBuilder", + """ + package org.apache.airflow.example; + import org.apache.airflow.sdk.Dag; + public final class TestExampleBuilder { public static Dag build() { var dag = new Dag("foo"); return dag; } } + """, + ) + } + + @Test + @DisplayName("generate builder for dag class with custom class name") + fun generateBuilderWithCustomClassName() { + val compilation = + compile( + """ + package org.apache.airflow.example; + import org.apache.airflow.sdk.Builder; + @Builder.Dag(to = "Foo") public class TestExample {} + """, + ) + assertThat(compilation) + .generatedSourceFile("org.apache.airflow.example.Foo") + .hasSourceEquivalentTo( + "org.apache.airflow.example.Foo", + """ + package org.apache.airflow.example; + import org.apache.airflow.sdk.Dag; + public final class Foo { public static Dag build() { var dag = new Dag("TestExample"); return dag; } } + """, + ) + } + + @Test + @DisplayName("generate builder for dag class with custom task name") + fun generateBuilderForDagClassWithCustomTaskName() { + val compilation = + compile( + """ + package org.apache.airflow.example; + import org.apache.airflow.sdk.Builder; + @Builder.Dag + public class TestExample { @Builder.Task(id = "foo") public void t1() {} } + """, + ) + + assertThat(compilation) + .generatedSourceFile("org.apache.airflow.example.TestExampleBuilder") + .hasSourceEquivalentTo( + "org.apache.airflow.example.TestExampleBuilder", + """ + package org.apache.airflow.example; + import java.lang.Exception; + import java.lang.Override; + import org.apache.airflow.sdk.Client; + import org.apache.airflow.sdk.Context; + import org.apache.airflow.sdk.Dag; + import org.apache.airflow.sdk.Task; + public final class TestExampleBuilder { + public static Dag build() { + var dag = new Dag("TestExample"); + dag.addTask("foo", T1.class); + return dag; + } + public static final class T1 implements Task { + @Override public void execute(Context context, Client client) throws Exception { new TestExample().t1(); } + } + } + """, + ) + } + + @Test + @DisplayName("generate builder for dag class with invalid task parameter") + fun generateBuilderForDagClassWithInvalidTaskParameter() { + val compilation = + compile( + """ + package org.apache.airflow.example; + import org.apache.airflow.sdk.Builder; + @Builder.Dag + public class TestExample { @Builder.Task(id = "foo") public void t1(String client) {} } + """, + ) + assertThat(compilation).failed() + assertThat(compilation).hadErrorContaining( + "Unsupported task parameter 'client' with type: java.lang.String", + ) + } + + @Test + @DisplayName("generate builder for dag class with varargs task parameter") + fun generateBuilderForDagClassWithVarArgsTaskParameter() { + val compilation = + compile( + """ + package org.apache.airflow.example; + import org.apache.airflow.sdk.Builder; + @Builder.Dag + public class TestExample { @Builder.Task(id = "foo") public void t1(String... client) {} } + """, + ) + assertThat(compilation).failed() + assertThat(compilation).hadErrorContaining( + "Cannot create task from vararg function t1", + ) + } +}