Skip to content

Commit 373da48

Browse files
committed
orchestratord test: Change upgrade order
1 parent 7bee63d commit 373da48

File tree

2 files changed

+94
-63
lines changed

2 files changed

+94
-63
lines changed

ci/nightly/pipeline.template.yml

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2367,6 +2367,7 @@ steps:
23672367
steps:
23682368
- id: orchestratord-defaults
23692369
label: "Orchestratord test (defaults from documentation)"
2370+
artifact_paths: ["mz_debug_*.zip"]
23702371
depends_on: devel-docker-tags
23712372
timeout_in_minutes: 120
23722373
plugins:
@@ -2379,6 +2380,7 @@ steps:
23792380

23802381
- id: orchestratord-default-properties
23812382
label: "Orchestratord test (defaults for properties)"
2383+
artifact_paths: ["mz_debug_*.zip"]
23822384
depends_on: devel-docker-tags
23832385
timeout_in_minutes: 120
23842386
plugins:
@@ -2391,6 +2393,7 @@ steps:
23912393

23922394
- id: orchestratord-individual
23932395
label: "Orchestratord test (individual properties)"
2396+
artifact_paths: ["mz_debug_*.zip"]
23942397
depends_on: devel-docker-tags
23952398
timeout_in_minutes: 120
23962399
plugins:
@@ -2403,6 +2406,7 @@ steps:
24032406

24042407
- id: orchestratord-combine
24052408
label: "Orchestratord test (combine properties)"
2409+
artifact_paths: ["mz_debug_*.zip"]
24062410
depends_on: build-aarch64
24072411
timeout_in_minutes: 120
24082412
plugins:
@@ -2415,64 +2419,52 @@ steps:
24152419

24162420
- id: orchestratord-upgrade-individual
24172421
label: "Orchestratord test (upgrade, individual props)"
2422+
artifact_paths: ["mz_debug_*.zip"]
24182423
depends_on: devel-docker-tags
24192424
timeout_in_minutes: 120
24202425
plugins:
24212426
- ./ci/plugins/mzcompose:
24222427
composition: orchestratord
24232428
args: [--action=upgrade, --properties=individual, --runtime=3600, --recreate-cluster]
24242429
ci-builder: stable
2425-
env:
2426-
# Old versions are not on GHCR yet
2427-
MZ_GHCR: 0
24282430
agents:
24292431
queue: hetzner-aarch64-8cpu-16gb
2430-
skip: "https://github.com/MaterializeInc/materialize/pull/34214"
24312432

24322433
- id: orchestratord-upgrade-combine
24332434
label: "Orchestratord test (upgrade, combine props)"
2435+
artifact_paths: ["mz_debug_*.zip"]
24342436
depends_on: devel-docker-tags
24352437
timeout_in_minutes: 120
24362438
plugins:
24372439
- ./ci/plugins/mzcompose:
24382440
composition: orchestratord
24392441
args: [--action=upgrade, --properties=combine, --runtime=3600, --recreate-cluster]
24402442
ci-builder: stable
2441-
env:
2442-
# Old versions are not on GHCR yet
2443-
MZ_GHCR: 0
24442443
agents:
24452444
queue: hetzner-aarch64-8cpu-16gb
2446-
skip: "https://github.com/MaterializeInc/materialize/pull/34214"
24472445

24482446
- id: orchestratord-upgrade-chain-individual
24492447
label: "Orchestratord test (upgrade chain, individual props)"
2448+
artifact_paths: ["mz_debug_*.zip"]
24502449
depends_on: devel-docker-tags
24512450
timeout_in_minutes: 120
24522451
plugins:
24532452
- ./ci/plugins/mzcompose:
24542453
composition: orchestratord
24552454
args: [--action=upgrade-chain, --properties=individual, --runtime=3600, --recreate-cluster]
24562455
ci-builder: stable
2457-
env:
2458-
# Old versions are not on GHCR yet
2459-
MZ_GHCR: 0
24602456
agents:
24612457
queue: hetzner-aarch64-8cpu-16gb
2462-
skip: "https://github.com/MaterializeInc/materialize/pull/34214"
24632458

24642459
- id: orchestratord-upgrade-chain-combine
24652460
label: "Orchestratord test (upgrade chain, combine props)"
2461+
artifact_paths: ["mz_debug_*.zip"]
24662462
depends_on: devel-docker-tags
24672463
timeout_in_minutes: 120
24682464
plugins:
24692465
- ./ci/plugins/mzcompose:
24702466
composition: orchestratord
24712467
args: [--action=upgrade-chain, --properties=combine, --runtime=3600, --recreate-cluster]
24722468
ci-builder: stable
2473-
env:
2474-
# Old versions are not on GHCR yet
2475-
MZ_GHCR: 0
24762469
agents:
24772470
queue: hetzner-aarch64-16cpu-32gb
2478-
skip: "https://github.com/MaterializeInc/materialize/pull/34214"

test/orchestratord/mzcompose.py

Lines changed: 86 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@
3030
import yaml
3131
from semver.version import Version
3232

33-
from materialize import MZ_ROOT, ci_util, git, spawn, ui
34-
from materialize.docker import MZ_GHCR_DEFAULT
33+
from materialize import MZ_ROOT, ci_util, git, spawn
3534
from materialize.mz_version import MzVersion
3635
from materialize.mzcompose.composition import (
3736
Composition,
@@ -41,6 +40,7 @@
4140
from materialize.mzcompose.services.balancerd import Balancerd
4241
from materialize.mzcompose.services.clusterd import Clusterd
4342
from materialize.mzcompose.services.environmentd import Environmentd
43+
from materialize.mzcompose.services.mz_debug import MzDebug
4444
from materialize.mzcompose.services.orchestratord import Orchestratord
4545
from materialize.mzcompose.services.testdrive import Testdrive
4646
from materialize.util import all_subclasses
@@ -55,9 +55,26 @@
5555
Environmentd(),
5656
Clusterd(),
5757
Balancerd(),
58+
MzDebug(),
5859
]
5960

6061

62+
def run_mz_debug() -> None:
63+
# TODO: Hangs a lot in CI
64+
# Only using capture because it's too noisy
65+
# spawn.capture(
66+
# [
67+
# "./mz-debug",
68+
# "self-managed",
69+
# "--k8s-namespace",
70+
# "materialize-environment",
71+
# "--mz-instance-name",
72+
# "12345678-1234-1234-1234-123456789012",
73+
# ]
74+
# )
75+
pass
76+
77+
6178
def get_tag(tag: str | None = None) -> str:
6279
# We can't use the mzbuild tag because it has a different fingerprint for
6380
# environmentd/clusterd/balancerd and the orchestratord depends on them
@@ -498,14 +515,9 @@ def validate(self, mods: dict[type[Modification], Any]) -> None:
498515
def check() -> None:
499516
environmentd = get_environmentd_data()
500517
image = environmentd["items"][0]["spec"]["containers"][0]["image"]
501-
image_registry = (
502-
"ghcr.io/materializeinc/materialize"
503-
if ui.env_is_truthy("MZ_GHCR", MZ_GHCR_DEFAULT)
504-
else "materialize"
505-
)
506-
expected = f"{image_registry}/environmentd:{self.value}"
518+
expected = f"materialize/environmentd:{self.value}"
507519
assert (
508-
image == expected
520+
image == expected or f"ghcr.io/materializeinc/{image}" == expected
509521
), f"Expected environmentd image {expected}, but found {image}"
510522

511523
retry(check, 240)
@@ -1070,11 +1082,11 @@ def check_pods() -> None:
10701082
class AuthenticatorKind(Modification):
10711083
@classmethod
10721084
def values(cls, version: MzVersion) -> list[Any]:
1073-
# Test None, Password (v0.147.7+), and Sasl (v0.147.16+)
1085+
# Test None, Password (v0.147.7+), and Sasl
10741086
result = ["None"]
10751087
if version >= MzVersion.parse_mz("v0.147.7"):
10761088
result.append("Password")
1077-
if version >= MzVersion.parse_mz("v0.147.16"):
1089+
if version >= MzVersion.parse_mz("v26.0.0"):
10781090
result.append("Sasl")
10791091
return result
10801092

@@ -1100,13 +1112,13 @@ def validate(self, mods: dict[type[Modification], Any]) -> None:
11001112
if self.value == "Password" and version <= MzVersion.parse_mz("v0.147.6"):
11011113
return
11021114

1103-
if self.value == "Sasl" and version < MzVersion.parse_mz("v0.147.16"):
1115+
if self.value == "Sasl" and version < MzVersion.parse_mz("v26.0.0"):
11041116
return
11051117

11061118
port = (
11071119
6875
11081120
if (version >= MzVersion.parse_mz("v0.147.0") and self.value == "Password")
1109-
or (version >= MzVersion.parse_mz("v0.147.16") and self.value == "Sasl")
1121+
or (version >= MzVersion.parse_mz("v26.0.0") and self.value == "Sasl")
11101122
else 6877
11111123
)
11121124
for i in range(120):
@@ -1252,10 +1264,23 @@ def workflow_defaults(c: Composition, parser: WorkflowArgumentParser) -> None:
12521264
)
12531265
args = parser.parse_args()
12541266

1255-
current_version = get_tag(args.tag)
1267+
c.up(Service("mz-debug", idle=True))
1268+
c.invoke("cp", "mz-debug:/usr/local/bin/mz-debug", ".")
1269+
1270+
current_version = get_version(args.tag)
12561271

12571272
# Following https://materialize.com/docs/installation/install-on-local-kind/
1258-
for version in reversed(get_self_managed_versions() + [get_version(args.tag)]):
1273+
# orchestratord test can't run against future versions, so ignore those
1274+
versions = reversed(
1275+
[
1276+
version
1277+
for version in get_self_managed_versions()
1278+
if version < current_version
1279+
]
1280+
+ [current_version]
1281+
)
1282+
for version in versions:
1283+
print(f"--- Running with defaults against {version}")
12591284
dir = "my-local-mz"
12601285
if os.path.exists(dir):
12611286
shutil.rmtree(dir)
@@ -1392,9 +1417,6 @@ def workflow_defaults(c: Composition, parser: WorkflowArgumentParser) -> None:
13921417
materialize_setup = list(yaml.load_all(f, Loader=yaml.Loader))
13931418
assert len(materialize_setup) == 3
13941419

1395-
print(version)
1396-
print(current_version)
1397-
print(version == current_version)
13981420
if version == current_version:
13991421
materialize_setup[2]["spec"][
14001422
"environmentdImageRef"
@@ -1493,6 +1515,7 @@ def workflow_defaults(c: Composition, parser: WorkflowArgumentParser) -> None:
14931515
]
14941516
)
14951517
raise ValueError("Never completed")
1518+
run_mz_debug()
14961519

14971520

14981521
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
@@ -1534,7 +1557,8 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
15341557
"0.29.0"
15351558
), f"kind >= v0.29.0 required, while you are on {kind_version}"
15361559

1537-
c.up(Service("testdrive", idle=True))
1560+
c.up(Service("testdrive", idle=True), Service("mz-debug", idle=True))
1561+
c.invoke("cp", "mz-debug:/usr/local/bin/mz-debug", ".")
15381562

15391563
cluster = "kind"
15401564
clusters = spawn.capture(["kind", "get", "clusters"]).strip().split("\n")
@@ -1658,9 +1682,6 @@ def get_mods() -> Iterator[list[Modification]]:
16581682
mods.append(EnvironmentdImageRef(str(args.tag)))
16591683
run_scenario([mods], definition)
16601684
elif action == Action.Upgrade:
1661-
assert not ui.env_is_truthy(
1662-
"MZ_GHCR", MZ_GHCR_DEFAULT
1663-
), "Manually set MZ_GHCR=0 as an environment variable for upgrade testing"
16641685
assert args.runtime
16651686
end_time = (
16661687
datetime.datetime.now() + datetime.timedelta(seconds=args.runtime)
@@ -1683,9 +1704,6 @@ def get_mods() -> Iterator[list[Modification]]:
16831704
]
16841705
run_scenario(scenario, definition)
16851706
elif action == Action.UpgradeChain:
1686-
assert not ui.env_is_truthy(
1687-
"MZ_GHCR", MZ_GHCR_DEFAULT
1688-
), "Manually set MZ_GHCR=0 as an environment variable for upgrade testing"
16891707
assert args.runtime
16901708
end_time = (
16911709
datetime.datetime.now() + datetime.timedelta(seconds=args.runtime)
@@ -1806,18 +1824,14 @@ def run_scenario(
18061824
mod.modify(definition)
18071825
if mod.value in mod.failed_reconciliation_values():
18081826
expect_fail = True
1809-
if not initialize:
1810-
definition["materialize"]["spec"][
1811-
"rolloutStrategy"
1812-
] = "ImmediatelyPromoteCausingDowntime"
1813-
definition["materialize"]["spec"]["requestRollout"] = str(uuid.uuid4())
1814-
run(definition, expect_fail)
18151827
if initialize:
18161828
init(definition)
18171829
run(definition, expect_fail)
18181830
initialize = False # only initialize once
18191831
else:
1820-
upgrade(definition, expect_fail)
1832+
upgrade_operator_helm_chart(definition, expect_fail)
1833+
definition["materialize"]["spec"]["requestRollout"] = str(uuid.uuid4())
1834+
run(definition, expect_fail)
18211835
mod_dict = {mod.__class__: mod.value for mod in mods}
18221836
for subclass in all_subclasses(Modification):
18231837
if subclass not in mod_dict:
@@ -1831,6 +1845,9 @@ def run_scenario(
18311845
f"Reproduce with bin/mzcompose --find orchestratord run default --recreate-cluster --scenario='{scenario_json}'"
18321846
)
18331847
raise
1848+
finally:
1849+
if not expect_fail:
1850+
run_mz_debug()
18341851

18351852

18361853
def init(definition: dict[str, Any]) -> None:
@@ -1890,7 +1907,7 @@ def init(definition: dict[str, Any]) -> None:
18901907
raise ValueError("Never completed")
18911908

18921909

1893-
def upgrade(definition: dict[str, Any], expect_fail: bool) -> None:
1910+
def upgrade_operator_helm_chart(definition: dict[str, Any], expect_fail: bool) -> None:
18941911
spawn.runv(
18951912
[
18961913
"helm",
@@ -1907,7 +1924,6 @@ def upgrade(definition: dict[str, Any], expect_fail: bool) -> None:
19071924
stdout=subprocess.DEVNULL,
19081925
stderr=subprocess.DEVNULL,
19091926
)
1910-
post_run_check(definition, expect_fail)
19111927

19121928

19131929
def run(definition: dict[str, Any], expect_fail: bool) -> None:
@@ -1930,23 +1946,48 @@ def run(definition: dict[str, Any], expect_fail: bool) -> None:
19301946

19311947

19321948
def post_run_check(definition: dict[str, Any], expect_fail: bool) -> None:
1933-
for i in range(60):
1949+
for i in range(900):
19341950
try:
1935-
spawn.capture(
1936-
[
1937-
"kubectl",
1938-
"get",
1939-
"materializes",
1940-
"-n",
1941-
"materialize-environment",
1942-
],
1943-
stderr=subprocess.DEVNULL,
1951+
data = json.loads(
1952+
spawn.capture(
1953+
[
1954+
"kubectl",
1955+
"get",
1956+
"materializes",
1957+
"-n",
1958+
"materialize-environment",
1959+
"-o",
1960+
"json",
1961+
],
1962+
stderr=subprocess.DEVNULL,
1963+
)
19441964
)
1945-
break
1965+
status = data["items"][0].get("status")
1966+
if not status:
1967+
continue
1968+
if (
1969+
not status["conditions"]
1970+
or status["conditions"][0]["type"] != "UpToDate"
1971+
):
1972+
continue
1973+
if (
1974+
status["lastCompletedRolloutRequest"]
1975+
== data["items"][0]["spec"]["requestRollout"]
1976+
):
1977+
break
19461978
except subprocess.CalledProcessError:
19471979
pass
19481980
time.sleep(1)
19491981
else:
1982+
spawn.runv(
1983+
[
1984+
"kubectl",
1985+
"get",
1986+
"materializes",
1987+
"-n",
1988+
"materialize-environment",
1989+
],
1990+
)
19501991
raise ValueError("Never completed")
19511992

19521993
for i in range(480):
@@ -2005,5 +2046,3 @@ def post_run_check(definition: dict[str, Any], expect_fail: bool) -> None:
20052046
]
20062047
)
20072048
raise ValueError("Never completed")
2008-
# Wait a bit for the status to stabilize
2009-
time.sleep(60)

0 commit comments

Comments
 (0)