-
Notifications
You must be signed in to change notification settings - Fork 14.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update conf column in dag_run table type from bytes to JSON (#44533)
* remove pickled data from dag run table * fix downgrade + add news fragement * remove archive table if exits after downgrade * removing archiving data * fixing static check * fixing static checks * simplying upgrade and downgrade as per review * fixing failures * removing setting conf to null * refactor approach to migrate values in conf * update offline warning * resolving conflicts * resolving conflicts * resolving conflicts * updating batch size * updaing conf type --------- Co-authored-by: Jed Cunningham <[email protected]>
- Loading branch information
1 parent
e229ca0
commit db132fb
Showing
7 changed files
with
159 additions
and
6 deletions.
There are no files selected for viewing
145 changes: 145 additions & 0 deletions
145
airflow/migrations/versions/0055_3_0_0_remove_pickled_data_from_dagrun_table.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
""" | ||
remove pickled data from dagrun table. | ||
Revision ID: e39a26ac59f6 | ||
Revises: 38770795785f | ||
Create Date: 2024-12-01 08:33:15.425141 | ||
""" | ||
|
||
from __future__ import annotations | ||
|
||
import json | ||
import pickle | ||
from textwrap import dedent | ||
|
||
import sqlalchemy as sa | ||
from alembic import context, op | ||
from sqlalchemy import text | ||
from sqlalchemy.dialects import postgresql | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = "e39a26ac59f6" | ||
down_revision = "38770795785f" | ||
branch_labels = None | ||
depends_on = None | ||
airflow_version = "3.0.0" | ||
|
||
|
||
def upgrade(): | ||
"""Apply remove pickled data from dagrun table.""" | ||
conn = op.get_bind() | ||
conf_type = sa.JSON().with_variant(postgresql.JSONB, "postgresql") | ||
op.add_column("dag_run", sa.Column("conf_json", conf_type, nullable=True)) | ||
|
||
if context.is_offline_mode(): | ||
print( | ||
dedent(""" | ||
------------ | ||
-- WARNING: Unable to migrate the data in the 'conf' column while in offline mode! | ||
-- The 'conf' column will be set to NULL in offline mode. | ||
-- Avoid using offline mode if you need to retain 'conf' values. | ||
------------ | ||
""") | ||
) | ||
else: | ||
BATCH_SIZE = 100 | ||
offset = 0 | ||
while True: | ||
rows = conn.execute( | ||
text( | ||
f"SELECT id,conf FROM dag_run WHERE conf IS not NULL order by id LIMIT {BATCH_SIZE} OFFSET {offset}" | ||
) | ||
).fetchall() | ||
if not rows: | ||
break | ||
for row in rows: | ||
row_id, pickle_data = row | ||
|
||
try: | ||
original_data = pickle.loads(pickle_data) | ||
json_data = json.dumps(original_data) | ||
conn.execute( | ||
text(""" | ||
UPDATE dag_run | ||
SET conf_json = :json_data | ||
WHERE id = :id | ||
"""), | ||
{"json_data": json_data, "id": row_id}, | ||
) | ||
except Exception as e: | ||
print(f"Error converting dagrun conf to json for dagrun ID {row_id}: {e}") | ||
continue | ||
offset += BATCH_SIZE | ||
|
||
op.drop_column("dag_run", "conf") | ||
|
||
op.alter_column("dag_run", "conf_json", existing_type=conf_type, new_column_name="conf") | ||
|
||
|
||
def downgrade(): | ||
"""Unapply Remove pickled data from dagrun table.""" | ||
conn = op.get_bind() | ||
op.add_column("dag_run", sa.Column("conf_pickle", sa.PickleType(), nullable=True)) | ||
|
||
if context.is_offline_mode(): | ||
print( | ||
dedent(""" | ||
------------ | ||
-- WARNING: Unable to migrate the data in the 'conf' column while in offline mode! | ||
-- The 'conf' column will be set to NULL in offline mode. | ||
-- Avoid using offline mode if you need to retain 'conf' values. | ||
------------ | ||
""") | ||
) | ||
|
||
else: | ||
BATCH_SIZE = 100 | ||
offset = 0 | ||
while True: | ||
rows = conn.execute( | ||
text( | ||
f"SELECT id,conf FROM dag_run WHERE conf IS not NULL order by id LIMIT {BATCH_SIZE} OFFSET {offset}" | ||
) | ||
).fetchall() | ||
if not rows: | ||
break | ||
for row in rows: | ||
row_id, json_data = row | ||
|
||
try: | ||
pickled_data = pickle.dumps(json_data, protocol=pickle.HIGHEST_PROTOCOL) | ||
conn.execute( | ||
text(""" | ||
UPDATE dag_run | ||
SET conf_pickle = :pickle_data | ||
WHERE id = :id | ||
"""), | ||
{"pickle_data": pickled_data, "id": row_id}, | ||
) | ||
except Exception as e: | ||
print(f"Error pickling dagrun conf for dagrun ID {row_id}: {e}") | ||
continue | ||
offset += BATCH_SIZE | ||
|
||
op.drop_column("dag_run", "conf") | ||
|
||
op.alter_column("dag_run", "conf_pickle", existing_type=sa.PickleType(), new_column_name="conf") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
f4ad824c8d9ff45e86002506edd83b540a88dab45bb292b1af96cd86dec5ecab | ||
ca59d711e6304f8bfdb25f49339d455602430dd6b880e420869fc892faef0596 |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
During offline migration, ``DagRun.conf`` is cleared | ||
|
||
.. Provide additional contextual information | ||
The ``conf`` column is changing from pickle to json, thus, the values in that column cannot be migrated during offline migrations. If you want to retain ``conf`` values for existing DagRuns, you must do a normal, non-offline, migration. |