From d97946c68d4b22f8fc243e35d0f7837c47024d51 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 8 Dec 2023 13:17:20 -0500 Subject: [PATCH 1/2] tapback: Move backend to InitWait earlier This is needed for libxl integration. tapdisk waits to transition to InitWait until after "hotplug-status" is connected. However libxl doesn't run its hotplug scripts, which write "hotplug-status", until the backend transitions to InitWait. With both sides waiting, progress is not made and connecting the blktap device times out and fails. Make tapback transition to InitWait earlier to resolve this issue under libxl. Place the transition to InitWait in tapback_backend_create_device() after the xenstore feature nodes have been written. InitWait is a signal to the frontend that such nodes have been written. This matches blkback's behaviour. It should also be fine since tapback still won't advance to Connected without the other setup like physical-device-path and hotplug-status. VBDs can be reconnected. When pv-grub is used, it connects the VBD, loads the kernel, disconnects the VBD. It then re-sets the frontend state to XenbusStateInitialising so that the new kernel will find and connect the VBD. tapback and blkback handle this case differently. When blkback observes the frontend transition to XenbusStateInitialising, and the backend is XenbusStateClosed, the backend transitions to XenbusStateInitWait. When tapback observes the frontend transition to XenbusStateInitialising, the backend checks for hotplug_status_connected to be true before switching XenbusStateInitWait. For tapback, this serves a second purpose for setting XenbusStateInitWait initially as well. With tapback setting XenbusStateInitWait earlier, follow the blkback model to transition to XenbusStateInitWait when the backend was previously in XenbusStateClosed. The end result shouldn't change, but it will remove an extra write and watch trigger for XenbusStateInitWait. Signed-off-by: Jason Andryuk --- tapback/backend.c | 9 +++++++++ tapback/frontend.c | 6 ++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tapback/backend.c b/tapback/backend.c index facae9bfd..e8dcf1bb5 100644 --- a/tapback/backend.c +++ b/tapback/backend.c @@ -259,6 +259,15 @@ tapback_backend_create_device(backend_t *backend, goto out; } + /* After tapback has written it's capabilities to XenStore, switch to + * InitWait. */ + err = xenbus_switch_state(device, XenbusStateInitWait); + if (unlikely(err)) { + WARN(device, "failed to switch to XenbusStateInitWait: %s\n", + strerror(-err)); + goto out; + } + out: if (err) { WARN(NULL, "%s: error creating device: %s\n", name, strerror(-err)); diff --git a/tapback/frontend.c b/tapback/frontend.c index e086813e6..ab0d393e3 100644 --- a/tapback/frontend.c +++ b/tapback/frontend.c @@ -439,8 +439,10 @@ frontend_changed(vbd_t * const device, const XenbusState state) switch (state) { case XenbusStateInitialising: - if (device->hotplug_status_connected) - err = xenbus_switch_state(device, XenbusStateInitWait); + if (device->state == XenbusStateClosed) { + DBG(device, "prepare for reconnect\n"); + err = xenbus_switch_state(device, XenbusStateInitWait); + } break; case XenbusStateInitialised: case XenbusStateConnected: From 89018c2bde8031c97bf5c368df08dc86cf206b7c Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Tue, 25 Apr 2023 12:59:20 -0400 Subject: [PATCH 2/2] tapback: Don't remove xenstore backend libxl doesn't clean up tapdisks because it doesn't call the hotplug cleanup scripts: libxl: debug: libxl_event.c:1043:devstate_callback: backend /local/domain/0/backend/vbd3/5/2048/state wanted state 6 but it was removed libxl: debug: libxl_event.c:849:libxl__ev_xswatch_deregister: watch w=0xf82ba0 wpath=/local/domain/0/backend/vbd3/5/2048/state token=1/2: deregister slotnum=1 libxl: debug: libxl_device.c:1156:device_backend_callback: Domain 5:calling device_backend_cleanup libxl: debug: libxl_event.c:863:libxl__ev_xswatch_deregister: watch w=0xf82ba0: deregister unregistered libxl: error: libxl_device.c:1169:device_backend_callback: Domain 5:unable to remove device with path /local/domain/0/backend/vbd3/5/2048 - rc -6 The backend state cannot be found because tapback deleted the entire backend subtree. tapback shouldn't remove the backend nodes when the frontend is removed, because the nodes contain the information on how to clean up. Leave the nodes and allowed them to be removed by the toolstack. Signed-off-by: Jason Andryuk --- tapback/frontend.c | 32 +++++--------------------------- 1 file changed, 5 insertions(+), 27 deletions(-) diff --git a/tapback/frontend.c b/tapback/frontend.c index ab0d393e3..2982f9a74 100644 --- a/tapback/frontend.c +++ b/tapback/frontend.c @@ -479,7 +479,7 @@ tapback_backend_handle_otherend_watch(backend_t *backend, { vbd_t *device = NULL; int err = 0, state = 0; - char *s = NULL, *end = NULL, *_path = NULL; + char *s = NULL, *end = NULL; ASSERT(backend); ASSERT(path); @@ -510,41 +510,19 @@ tapback_backend_handle_otherend_watch(backend_t *backend, */ s = tapback_xs_read(device->backend->xs, XBT_NULL, "%s", device->frontend_state_path); - if (!s) { - err = errno; - /* - * If the front-end XenBus node is missing, the XenBus device has been - * removed: remove the XenBus back-end node. - */ - if (err == ENOENT) { - err = asprintf(&_path, "%s/%s/%d/%d", XENSTORE_BACKEND, - device->backend->name, device->domid, device->devid); - if (err == -1) { - err = errno; - WARN(device, "failed to asprintf: %s\n", strerror(err)); - goto out; - } - err = 0; - if (!xs_rm(device->backend->xs, XBT_NULL, _path)) { - if (errno != ENOENT) { - err = errno; - WARN(device, "failed to remove %s: %s\n", path, - strerror(err)); - } - } - } - } else { + if (s) { state = strtol(s, &end, 0); if (*end != 0 || end == s) { WARN(device, "invalid XenBus state '%s'\n", s); err = EINVAL; } else err = frontend_changed(device, state); + } else { + WARN(device, "frontend disappeared!"); + err = ENOENT; } -out: free(s); - free(_path); return err; }