24
24
from dataclasses import dataclass
25
25
from datetime import datetime
26
26
from http import HTTPStatus
27
+ from multiprocessing import Process
27
28
from pathlib import Path
28
29
from subprocess import Popen
29
30
from time import sleep
@@ -82,12 +83,6 @@ def force_use_internal_api_on_edge_worker():
82
83
os .environ ["_AIRFLOW__SKIP_DATABASE_EXECUTOR_COMPATIBILITY_CHECK" ] = "1"
83
84
os .environ ["AIRFLOW_ENABLE_AIP_44" ] = "True"
84
85
if "airflow" in sys .argv [0 ] and sys .argv [1 :3 ] == ["edge" , "worker" ]:
85
- if AIRFLOW_V_3_0_PLUS :
86
- # Obvious TODO Make EdgeWorker compatible with Airflow 3 (again)
87
- raise SystemExit (
88
- "Error: EdgeWorker is currently broken on Airflow 3/main due to removal of AIP-44, rework for AIP-72."
89
- )
90
-
91
86
api_url = conf .get ("edge" , "api_url" )
92
87
if not api_url :
93
88
raise SystemExit ("Error: API URL is not configured, please correct configuration." )
@@ -138,11 +133,26 @@ class _Job:
138
133
"""Holds all information for a task/job to be executed as bundle."""
139
134
140
135
edge_job : EdgeJobFetched
141
- process : Popen
136
+ process : Popen | Process
142
137
logfile : Path
143
138
logsize : int
144
139
"""Last size of log file, point of last chunk push."""
145
140
141
+ @property
142
+ def is_running (self ) -> bool :
143
+ """Check if the job is still running."""
144
+ if isinstance (self .process , Popen ):
145
+ self .process .poll ()
146
+ return self .process .returncode is None
147
+ return self .process .exitcode is None
148
+
149
+ @property
150
+ def is_success (self ) -> bool :
151
+ """Check if the job was successful."""
152
+ if isinstance (self .process , Popen ):
153
+ return self .process .returncode == 0
154
+ return self .process .exitcode == 0
155
+
146
156
147
157
class _EdgeWorkerCli :
148
158
"""Runner instance which executes the Edge Worker."""
@@ -191,6 +201,73 @@ def _get_sysinfo(self) -> dict:
191
201
"free_concurrency" : self .free_concurrency ,
192
202
}
193
203
204
+ def _launch_job_af3 (self , edge_job : EdgeJobFetched ) -> tuple [Process , Path ]:
205
+ if TYPE_CHECKING :
206
+ from airflow .executors .workloads import ExecuteTask
207
+
208
+ def _run_job_via_supervisor (
209
+ workload : ExecuteTask ,
210
+ ) -> int :
211
+ from setproctitle import setproctitle
212
+
213
+ from airflow .sdk .execution_time .supervisor import supervise
214
+
215
+ # Ignore ctrl-c in this process -- we don't want to kill _this_ one. we let tasks run to completion
216
+ signal .signal (signal .SIGINT , signal .SIG_IGN )
217
+
218
+ logger .info ("Worker starting up pid=%d" , os .getpid ())
219
+ setproctitle (f"airflow edge worker: { workload .ti .key } " )
220
+
221
+ try :
222
+ supervise (
223
+ # This is the "wrong" ti type, but it duck types the same. TODO: Create a protocol for this.
224
+ # Same like in airflow/executors/local_executor.py:_execute_work()
225
+ ti = workload .ti , # type: ignore[arg-type]
226
+ dag_path = workload .dag_path ,
227
+ token = workload .token ,
228
+ server = conf .get (
229
+ "workers" , "execution_api_server_url" , fallback = "http://localhost:9091/execution/"
230
+ ),
231
+ log_path = workload .log_path ,
232
+ )
233
+ return 0
234
+ except Exception as e :
235
+ logger .exception ("Task execution failed: %s" , e )
236
+ return 1
237
+
238
+ workload : ExecuteTask = edge_job .command
239
+ process = Process (
240
+ target = _run_job_via_supervisor ,
241
+ kwargs = {"workload" : workload },
242
+ )
243
+ process .start ()
244
+ base_log_folder = conf .get ("logging" , "base_log_folder" , fallback = "NOT AVAILABLE" )
245
+ if TYPE_CHECKING :
246
+ assert workload .log_path # We need to assume this is defined in here
247
+ logfile = Path (base_log_folder , workload .log_path )
248
+ return process , logfile
249
+
250
+ def _launch_job_af2_10 (self , edge_job : EdgeJobFetched ) -> tuple [Popen , Path ]:
251
+ """Compatibility for Airflow 2.10 Launch."""
252
+ env = os .environ .copy ()
253
+ env ["AIRFLOW__CORE__DATABASE_ACCESS_ISOLATION" ] = "True"
254
+ env ["AIRFLOW__CORE__INTERNAL_API_URL" ] = conf .get ("edge" , "api_url" )
255
+ env ["_AIRFLOW__SKIP_DATABASE_EXECUTOR_COMPATIBILITY_CHECK" ] = "1"
256
+ command : list [str ] = edge_job .command # type: ignore[assignment]
257
+ process = Popen (command , close_fds = True , env = env , start_new_session = True )
258
+ logfile = logs_logfile_path (edge_job .key )
259
+ return process , logfile
260
+
261
+ def _launch_job (self , edge_job : EdgeJobFetched ):
262
+ """Get the received job executed."""
263
+ process : Popen | Process
264
+ if AIRFLOW_V_3_0_PLUS :
265
+ process , logfile = self ._launch_job_af3 (edge_job )
266
+ else :
267
+ # Airflow 2.10
268
+ process , logfile = self ._launch_job_af2_10 (edge_job )
269
+ self .jobs .append (_Job (edge_job , process , logfile , 0 ))
270
+
194
271
def start (self ):
195
272
"""Start the execution in a loop until terminated."""
196
273
try :
@@ -239,13 +316,7 @@ def fetch_job(self) -> bool:
239
316
edge_job = jobs_fetch (self .hostname , self .queues , self .free_concurrency )
240
317
if edge_job :
241
318
logger .info ("Received job: %s" , edge_job )
242
- env = os .environ .copy ()
243
- env ["AIRFLOW__CORE__DATABASE_ACCESS_ISOLATION" ] = "True"
244
- env ["AIRFLOW__CORE__INTERNAL_API_URL" ] = conf .get ("edge" , "api_url" )
245
- env ["_AIRFLOW__SKIP_DATABASE_EXECUTOR_COMPATIBILITY_CHECK" ] = "1"
246
- process = Popen (edge_job .command , close_fds = True , env = env , start_new_session = True )
247
- logfile = logs_logfile_path (edge_job .key )
248
- self .jobs .append (_Job (edge_job , process , logfile , 0 ))
319
+ self ._launch_job (edge_job )
249
320
jobs_set_state (edge_job .key , TaskInstanceState .RUNNING )
250
321
return True
251
322
@@ -257,10 +328,9 @@ def check_running_jobs(self) -> None:
257
328
used_concurrency = 0
258
329
for i in range (len (self .jobs ) - 1 , - 1 , - 1 ):
259
330
job = self .jobs [i ]
260
- job .process .poll ()
261
- if job .process .returncode is not None :
331
+ if not job .is_running :
262
332
self .jobs .remove (job )
263
- if job .process . returncode == 0 :
333
+ if job .is_success :
264
334
logger .info ("Job completed: %s" , job .edge_job )
265
335
jobs_set_state (job .edge_job .key , TaskInstanceState .SUCCESS )
266
336
else :
0 commit comments