Skip to content

Commit b218d2b

Browse files
authored
Fix the mnist_gcp_test.py (kubeflow#741)
* Fix the mnist_gcp_test.py * The job spec was invalid; we were missing container name * There were a bunch of other issues as well. * Pull in the changes from xgboost_synthetic to upload an HTML version of the notebook output to GCS. * Add exceptoin * Revert "Add exceptoin" This reverts commit 44f34d9.
1 parent 5b4b0c6 commit b218d2b

File tree

6 files changed

+137
-31
lines changed

6 files changed

+137
-31
lines changed

prow_config.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ workflows:
5656
include_dirs:
5757
- xgboost_synthetic/*
5858
- mnist/*
59+
- py/kubeflow/examples/notebook_tests
5960
- py/kubeflow/examples/create_e2e_workflow.py
6061

6162
# E2E test for various notebooks
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
1-
import fire
1+
import argparse
22
import tempfile
33
import logging
44
import os
55
import subprocess
66

77
logger = logging.getLogger(__name__)
88

9+
from google.cloud import storage
10+
from kubeflow.testing import util
11+
912
def prepare_env():
1013
subprocess.check_call(["pip3", "install", "-U", "papermill"])
11-
subprocess.check_call(["pip3", "install", "-r", "../requirements.txt"])
12-
14+
subprocess.check_call(["pip3", "install", "-U", "nbconvert"])
15+
subprocess.check_call(["pip3", "install", "-U", "nbformat"])
1316

1417
def execute_notebook(notebook_path, parameters=None):
1518
import papermill #pylint: disable=import-error
@@ -21,13 +24,32 @@ def execute_notebook(notebook_path, parameters=None):
2124
log_output=True)
2225
return notebook_output_path
2326

24-
def run_notebook_test(notebook_path, expected_messages, parameters=None):
27+
def _upload_notebook_html(content, target):
28+
gcs_client = storage.Client()
29+
bucket_name, path = util.split_gcs_uri(target)
30+
31+
bucket = gcs_client.get_bucket(bucket_name)
32+
33+
logging.info("Uploading notebook to %s.", target)
34+
blob = bucket.blob(path)
35+
# Need to set content type so that if we browse in GCS we end up rendering
36+
# as html.
37+
blob.upload_from_string(content, content_type="text/html")
38+
39+
def run_notebook_test(notebook_path, parameters=None):
40+
import nbformat #pylint: disable=import-error
41+
import nbconvert #pylint: disable=import-error
42+
2543
output_path = execute_notebook(notebook_path, parameters=parameters)
26-
actual_output = open(output_path, 'r').read()
27-
for expected_message in expected_messages:
28-
if not expected_message in actual_output:
29-
logger.error(actual_output)
30-
assert False, "Unable to find from output: " + expected_message
44+
45+
with open(output_path, "r") as hf:
46+
actual_output = hf.read()
47+
48+
nb = nbformat.reads(actual_output, as_version=4)
49+
html_exporter = nbconvert.HTMLExporter()
50+
(html_output, _) = html_exporter.from_notebook_node(nb)
51+
gcs_path = os.getenv("OUTPUT_GCS")
52+
_upload_notebook_html(html_output, gcs_path)
3153

3254
class NotebookExecutor:
3355
@staticmethod
@@ -40,13 +62,7 @@ def test(notebook_path):
4062
prepare_env()
4163
FILE_DIR = os.path.dirname(__file__)
4264

43-
EXPECTED_MGS = [
44-
"Finished upload of",
45-
"Model export success: mockup-model.dat",
46-
"Pod started running True",
47-
"Cluster endpoint: http:",
48-
]
49-
run_notebook_test(notebook_path, EXPECTED_MGS)
65+
run_notebook_test(notebook_path)
5066

5167
if __name__ == "__main__":
5268
logging.basicConfig(level=logging.INFO,
@@ -55,4 +71,13 @@ def test(notebook_path):
5571
datefmt='%Y-%m-%dT%H:%M:%S',
5672
)
5773

58-
fire.Fire(NotebookExecutor)
74+
# fire isn't available in the notebook image which is why we aren't
75+
# using it.
76+
parser = argparse.ArgumentParser()
77+
parser.add_argument(
78+
"--notebook_path", default="", type=str, help=("Path to the notebook"))
79+
80+
args = parser.parse_args()
81+
82+
NotebookExecutor.test(args.notebook_path)
83+

py/kubeflow/examples/notebook_tests/job.yaml

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
# A batch job to run a notebook using papermill.
2+
# The YAML is modified by nb_test_util.py to generate a Job specific
3+
# to a notebook.
4+
#
25
# TODO(jlewi): We should switch to using Tekton
36
apiVersion: batch/v1
47
kind: Job
@@ -36,8 +39,9 @@ spec:
3639
- env:
3740
- name: PYTHONPATH
3841
value: /src/kubeflow/examples/py/
39-
- name: executing-notebooks
42+
name: executing-notebooks
4043
image: execute-image
44+
# Command will get overwritten by nb_test_util.py
4145
command: ["python3", "-m",
4246
"kubeflow.examples.notebook_tests.execute_notebook",
4347
"test", "/src/kubeflow/examples/mnist/mnist_gcp.ipynb"]

py/kubeflow/examples/notebook_tests/mnist_gcp_test.py

+11-7
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,26 @@
1+
import datetime
12
import logging
23
import os
4+
import uuid
35

46
import pytest
57

68
from kubeflow.examples.notebook_tests import nb_test_util
79
from kubeflow.testing import util
810

9-
# TODO(jlewi): This test is new; there's some work to be done to make it
10-
# reliable. So for now we mark it as expected to fail in presubmits
11-
# We only mark it as expected to fail
12-
# on presubmits because if expected failures don't show up in test grid
13-
# and we want signal in postsubmits and periodics
14-
@pytest.mark.xfail(os.getenv("JOB_TYPE") == "presubmit", reason="Flaky")
1511
def test_mnist_gcp(record_xml_attribute, name, namespace, # pylint: disable=too-many-branches,too-many-statements
1612
repos, image):
1713
'''Generate Job and summit.'''
14+
util.set_pytest_junit(record_xml_attribute, "test_mnist")
15+
16+
if not name:
17+
name = "mnist-" + datetime.datetime.now().strftime("%H%M%S") + "-"
18+
name = name + uuid.uuid4().hex[0:3]
19+
1820
util.set_pytest_junit(record_xml_attribute, "test_mnist_gcp")
19-
nb_test_util.run_papermill_job(name, namespace, repos, image)
21+
22+
notebook_path = "kubeflow/examples/mnist/mnist_gcp.ipynb"
23+
nb_test_util.run_papermill_job(notebook_path, name, namespace, repos, image)
2024

2125

2226
if __name__ == "__main__":

py/kubeflow/examples/notebook_tests/nb_test_util.py

+78-5
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,47 @@
22

33
import datetime
44
import logging
5+
import os
56
import uuid
7+
import tempfile
68
import yaml
79

10+
from google.cloud import storage
811
from kubernetes import client as k8s_client
912
from kubeflow.testing import argo_build_util
13+
from kubeflow.testing import prow_artifacts
1014
from kubeflow.testing import util
1115

12-
def run_papermill_job(name, namespace, # pylint: disable=too-many-branches,too-many-statements
16+
# This is the bucket where the batch jobs will uploaded an HTML version of the
17+
# notebook will be written to. The K8s job is running in a Kubeflow cluster
18+
# so it needs to be a bucket that the kubeflow cluster can write to.
19+
# This is why we don't write directly to the bucket used for prow artifacts
20+
NB_BUCKET = "kubeflow-ci-deployment"
21+
PROJECT = "kbueflow-ci-deployment"
22+
23+
def run_papermill_job(notebook_path, name, namespace, # pylint: disable=too-many-branches,too-many-statements
1324
repos, image):
1425
"""Generate a K8s job to run a notebook using papermill
1526
1627
Args:
28+
notebook_path: Path to the notebook. This should be in the form
29+
"{REPO_OWNER}/{REPO}/path/to/notebook.ipynb"
1730
name: Name for the K8s job
1831
namespace: The namespace where the job should run.
19-
repos: (Optional) Which repos to checkout; if not specified tries
32+
repos: Which repos to checkout; if None or empty tries
2033
to infer based on PROW environment variables
21-
image:
34+
image: The docker image to run the notebook in.
2235
"""
2336

2437
util.maybe_activate_service_account()
2538

2639
with open("job.yaml") as hf:
2740
job = yaml.load(hf)
2841

42+
if notebook_path.startswith("/"):
43+
raise ValueError("notebook_path={0} should not start with /".format(
44+
notebook_path))
45+
2946
# We need to checkout the correct version of the code
3047
# in presubmits and postsubmits. We should check the environment variables
3148
# for the prow environment variables to get the appropriate values.
@@ -35,23 +52,68 @@ def run_papermill_job(name, namespace, # pylint: disable=too-many-branches,too-m
3552
if not repos:
3653
repos = argo_build_util.get_repo_from_prow_env()
3754

55+
if not repos:
56+
raise ValueError("Could not get repos from prow environment variable "
57+
"and --repos isn't explicitly set")
58+
59+
repos += ",kubeflow/testing@HEAD"
60+
3861
logging.info("Repos set to %s", repos)
3962
job["spec"]["template"]["spec"]["initContainers"][0]["command"] = [
4063
"/usr/local/bin/checkout_repos.sh",
4164
"--repos=" + repos,
4265
"--src_dir=/src",
4366
"--depth=all",
4467
]
68+
4569
job["spec"]["template"]["spec"]["containers"][0]["image"] = image
70+
71+
full_notebook_path = os.path.join("/src", notebook_path)
72+
job["spec"]["template"]["spec"]["containers"][0]["command"] = [
73+
"python3", "-m",
74+
"kubeflow.examples.notebook_tests.execute_notebook",
75+
"--notebook_path", full_notebook_path]
76+
77+
job["spec"]["template"]["spec"]["containers"][0][
78+
"workingDir"] = os.path.dirname(full_notebook_path)
79+
80+
# The prow bucket to use for results/artifacts
81+
prow_bucket = prow_artifacts.PROW_RESULTS_BUCKET
82+
83+
if os.getenv("REPO_OWNER") and os.getenv("REPO_NAME"):
84+
# Running under prow
85+
prow_dir = prow_artifacts.get_gcs_dir(prow_bucket)
86+
prow_dir = os.path.join(prow_dir, "artifacts")
87+
88+
if os.getenv("TEST_TARGET_NAME"):
89+
prow_dir = os.path.join(
90+
prow_dir, os.getenv("TEST_TARGET_NAME").lstrip("/"))
91+
prow_bucket, prow_path = util.split_gcs_uri(prow_dir)
92+
93+
else:
94+
prow_path = "notebook-test" + datetime.datetime.now().strftime("%H%M%S")
95+
prow_path = prow_path + "-" + uuid.uuid4().hex[0:3]
96+
prow_dir = util.to_gcs_uri(prow_bucket, prow_path)
97+
98+
prow_path = os.path.join(prow_path, name + ".html")
99+
output_gcs = util.to_gcs_uri(NB_BUCKET, prow_path)
100+
101+
job["spec"]["template"]["spec"]["containers"][0]["env"] = [
102+
{"name": "OUTPUT_GCS", "value": output_gcs},
103+
{"name": "PYTHONPATH",
104+
"value": "/src/kubeflow/testing/py:/src/kubeflow/examples/py"},
105+
]
106+
107+
logging.info("Notebook will be written to %s", output_gcs)
46108
util.load_kube_config(persist_config=False)
47109

48110
if name:
49111
job["metadata"]["name"] = name
50112
else:
51-
job["metadata"]["name"] = ("xgboost-test-" +
113+
job["metadata"]["name"] = ("notebook-test-" +
52114
datetime.datetime.now().strftime("%H%M%S")
53115
+ "-" + uuid.uuid4().hex[0:3])
54-
name = job["metadata"]["name"]
116+
name = job["metadata"]["name"]
55117

56118
job["metadata"]["namespace"] = namespace
57119

@@ -70,6 +132,16 @@ def run_papermill_job(name, namespace, # pylint: disable=too-many-branches,too-m
70132

71133
logging.info("Final job:\n%s", yaml.safe_dump(final_job.to_dict()))
72134

135+
# Download notebook html to artifacts
136+
logging.info("Copying %s to bucket %s", output_gcs, prow_bucket)
137+
138+
storage_client = storage.Client()
139+
bucket = storage_client.get_bucket(NB_BUCKET)
140+
blob = bucket.get_blob(prow_path)
141+
142+
destination_bucket = storage_client.get_bucket(prow_bucket)
143+
bucket.copy_blob(blob, destination_bucket)
144+
73145
if not final_job.status.conditions:
74146
raise RuntimeError("Job {0}.{1}; did not complete".format(namespace, name))
75147

@@ -78,3 +150,4 @@ def run_papermill_job(name, namespace, # pylint: disable=too-many-branches,too-m
78150
if last_condition.type not in ["Complete"]:
79151
logging.error("Job didn't complete successfully")
80152
raise RuntimeError("Job {0}.{1} failed".format(namespace, name))
153+

xgboost_synthetic/testing/execute_notebook.py

-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ def prepare_env():
1313
subprocess.check_call(["pip3", "install", "-U", "nbformat"])
1414
subprocess.check_call(["pip3", "install", "-r", "../requirements.txt"])
1515

16-
1716
def execute_notebook(notebook_path, parameters=None):
1817
temp_dir = tempfile.mkdtemp()
1918
notebook_output_path = os.path.join(temp_dir, "out.ipynb")

0 commit comments

Comments
 (0)