-
Notifications
You must be signed in to change notification settings - Fork 17
Open
Description
When trying to migrate SPDK NVMf/vfio-user target which creates an NVMe controller with one namespace in the guest (/dev/nvme0n1), destination QEMU fails with:
Unable to write to socket: Bad file descriptor
Using the following:
- QEMU: https://github.com/oracle/qemu/tree/vfio-user-v0.9
- libvfio-user: drop mappable flag from DMA map nutanix/libvfio-user#553
- SPDK: https://review.spdk.io/gerrit/c/spdk/spdk/+/7617
Debugging further, this happens here:
#0 0x0000555555cb8eff in qio_channel_socket_writev (ioc=0x5555568a2400, iov=0x7fffec35da90, niov=1, fds=0x555557314934, nfds=3, errp=0x7fffec35da70) at ../io/channel-socket.c:571
#1 0x0000555555cb2627 in qio_channel_writev_full (ioc=0x5555568a2400, iov=0x7fffec35da90, niov=1, fds=0x555557314934, nfds=3, errp=0x7fffec35da70) at ../io/channel.c:86
#2 0x0000555555c812d5 in vfio_user_send_locked (proxy=0x5555575af7e0, msg=0x55555747eb70, fds=0x7fffec35db40) at ../hw/vfio/user.c:278
#3 0x0000555555c815c9 in vfio_user_send_recv (proxy=0x5555575af7e0, msg=0x55555747eb70, fds=0x7fffec35db40, rsize=0) at ../hw/vfio/user.c:351
#4 0x0000555555c82c38 in vfio_user_set_irqs (vbasedev=0x5555575a9c70, irq=0x555557314920) at ../hw/vfio/user.c:898
#5 0x0000555555c6b79d in vfio_enable_vectors (vdev=0x5555575a9370, msix=true) at ../hw/vfio/pci.c:413
#6 0x0000555555c6bb4c in vfio_msix_vector_do_use (pdev=0x5555575a9370, nr=3, msg=0x0, handler=0x0) at ../hw/vfio/pci.c:516
#7 0x0000555555c6be8c in vfio_msix_enable (vdev=0x5555575a9370) at ../hw/vfio/pci.c:615
#8 0x0000555555c70b0b in vfio_pci_load_config (vbasedev=0x5555575a9c70, f=0x5555568f5af0) at ../hw/vfio/pci.c:2528
#9 0x0000555555bab3df in vfio_load_device_config_state (f=0x5555568f5af0, opaque=0x5555575a9c70) at ../hw/vfio/migration.c:382
#10 0x0000555555babbe2 in vfio_load_state (f=0x5555568f5af0, opaque=0x5555575a9c70, version_id=1) at ../hw/vfio/migration.c:649
#11 0x00005555558a5cb9 in vmstate_load (f=0x5555568f5af0, se=0x555556964df0) at ../migration/savevm.c:908
#12 0x00005555558a8dec in qemu_loadvm_section_start_full (f=0x5555568f5af0, mis=0x5555568cec70) at ../migration/savevm.c:2433
#13 0x00005555558a944a in qemu_loadvm_state_main (f=0x5555568f5af0, mis=0x5555568cec70) at ../migration/savevm.c:2619
#14 0x00005555558a95c5 in qemu_loadvm_state (f=0x5555568f5af0) at ../migration/savevm.c:2698
#15 0x00005555558e437d in process_incoming_migration_co (opaque=0x0) at ../migration/migration.c:555
#16 0x0000555555e28cb6 in coroutine_trampoline (i0=1457783792, i1=21845) at ../util/coroutine-ucontext.c:173
#17 0x00007ffff75a4b50 in __correctly_grouped_prefixwc (begin=0x7fffec35da70 L"\x56965b50啕\003", end=0x0, thousands=-175363960 L'\xf58c2888', grouping=0x555556650010 "") at grouping.c:171
#18 0x0000000000000000 in ()
(gdb) p errno
$2 = 9
(gdb) p sioc->fd
$3 = 13
Looking at the FD:
# ls -lh /proc/1816/fd/13
lrwx------ 1 root root 64 Jun 8 11:43 /proc/1816/fd/13 -> 'socket:[30949]'
# cat /proc/1816/fdinfo/13
pos: 0
flags: 02000002
mnt_id: 10
The source QEMU is run as follows:
/opt/qemu/bin/qemu-system-x86_64 -smp 4 -nographic -m 2G -object memory-backend-file,id=mem0,size=2G,mem-path=/dev/hugepages,share=on,prealloc=yes, -numa node,memdev=mem0 -kernel bionic-server-cloudimg-amd64-vmlinuz-generic -initrd bionic-server-cloudimg-amd64-initrd-generic -append console=ttyS0 root=/dev/sda1 single intel_iommu=on -hda bionic-server-cloudimg-amd64-0.raw -hdb nvme.img -nic user,model=virtio-net-pci -machine pc-q35-3.1 -device vfio-user-pci,socket=/var/run/vfio-user.sock,x-enable-migration=on -D qemu.out -trace enable=vfio*
and destination QEMU:
/opt/qemu/bin/qemu-system-x86_64 -smp 4 -nographic -m 2G -object memory-backend-file,id=mem0,size=2G,mem-path=/dev/hugepages,share=on,prealloc=yes, -numa node,memdev=mem0 -kernel bionic-server-cloudimg-amd64-vmlinuz-generic -initrd bionic-server-cloudimg-amd64-initrd-generic -append console=ttyS0 root=/dev/sda1 single intel_iommu=on -hda bionic-server-cloudimg-amd64-0.raw -hdb nvme.img -nic user,model=virtio-net-pci -machine pc-q35-3.1 -device vfio-user-pci,socket=/var/run/vfio-user.sock,x-enable-migration=on -D qemu.out -trace enable=vfio* -incoming tcp:0:4444
I migrate using:
migrate -d tcp:<IP address>:4444
In the source QEMU log:
vfio_msi_interrupt (VFIO user </var/run/vfio-user.sock>) vector 2 0xfee04004/0x4023
vfio_get_dirty_bitmap container fd=-1, iova=0x0 size= 0xa0000 bitmap_size=0x18 start=0x0
vfio_get_dirty_bitmap container fd=-1, iova=0xc0000 size= 0xb000 bitmap_size=0x8 start=0xc0000
vfio_get_dirty_bitmap container fd=-1, iova=0xcb000 size= 0x3000 bitmap_size=0x8 start=0xcb000
vfio_get_dirty_bitmap container fd=-1, iova=0xce000 size= 0x1e000 bitmap_size=0x8 start=0xce000
vfio_msi_interrupt (VFIO user </var/run/vfio-user.sock>) vector 2 0xfee04004/0x4023
vfio_get_dirty_bitmap container fd=-1, iova=0xec000 size= 0x4000 bitmap_size=0x8 start=0xec000
vfio_get_dirty_bitmap container fd=-1, iova=0xf0000 size= 0x10000 bitmap_size=0x8 start=0xf0000
vfio_get_dirty_bitmap container fd=-1, iova=0x100000 size= 0x7ff00000 bitmap_size=0xffe0 start=0x100000
vfio_get_dirty_bitmap container fd=-1, iova=0xfd000000 size= 0x1000000 bitmap_size=0x200 start=0x80080000
vfio_get_dirty_bitmap container fd=-1, iova=0xfebd1000 size= 0x1000 bitmap_size=0x8 start=0x81100000
vfio_get_dirty_bitmap container fd=-1, iova=0xfffc0000 size= 0x40000 bitmap_size=0x8 start=0x80000000
vfio_update_pending (VFIO user </var/run/vfio-user.sock>) pending 0x8000
vfio_save_pending (VFIO user </var/run/vfio-user.sock>) precopy 0x1195000 postcopy 0x0 compatible 0x0
vfio_migration_set_state (VFIO user </var/run/vfio-user.sock>) state 2
vfio_vmstate_change (VFIO user </var/run/vfio-user.sock>) running 0 reason finish-migrate device state 2
vfio_get_dirty_bitmap container fd=-1, iova=0x0 size= 0xa0000 bitmap_size=0x18 start=0x0
vfio_get_dirty_bitmap container fd=-1, iova=0xc0000 size= 0xb000 bitmap_size=0x8 start=0xc0000
vfio_get_dirty_bitmap container fd=-1, iova=0xcb000 size= 0x3000 bitmap_size=0x8 start=0xcb000
vfio_get_dirty_bitmap container fd=-1, iova=0xce000 size= 0x1e000 bitmap_size=0x8 start=0xce000
vfio_get_dirty_bitmap container fd=-1, iova=0xec000 size= 0x4000 bitmap_size=0x8 start=0xec000
vfio_get_dirty_bitmap container fd=-1, iova=0xf0000 size= 0x10000 bitmap_size=0x8 start=0xf0000
vfio_get_dirty_bitmap container fd=-1, iova=0x100000 size= 0x7ff00000 bitmap_size=0xffe0 start=0x100000
vfio_get_dirty_bitmap container fd=-1, iova=0xfd000000 size= 0x1000000 bitmap_size=0x200 start=0x80080000
vfio_get_dirty_bitmap container fd=-1, iova=0xfebd1000 size= 0x1000 bitmap_size=0x8 start=0x81100000
vfio_get_dirty_bitmap container fd=-1, iova=0xfffc0000 size= 0x40000 bitmap_size=0x8 start=0x80000000
vfio_migration_set_state (VFIO user </var/run/vfio-user.sock>) state 2
vfio_update_pending (VFIO user </var/run/vfio-user.sock>) pending 0x8000
vfio_save_buffer (VFIO user </var/run/vfio-user.sock>) Offset 0x1000 size 0x8000 pending 0x8000
vfio_update_pending (VFIO user </var/run/vfio-user.sock>) pending 0x8000
vfio_save_buffer (VFIO user </var/run/vfio-user.sock>) Offset 0x9000 size 0x0 pending 0x8000
vfio_migration_set_state (VFIO user </var/run/vfio-user.sock>) state 0
vfio_save_complete_precopy (VFIO user </var/run/vfio-user.sock>)
vfio_save_device_config_state (VFIO user </var/run/vfio-user.sock>)
vfio_region_unmap Region migration mmaps[0] unmap [0x1000 - 0x8fff]
vfio_save_cleanup (VFIO user </var/run/vfio-user.sock>)
vfio_migration_state_notifier (VFIO user </var/run/vfio-user.sock>) state completed
And in the destination QEMU:
...
vfio_region_mmap Region migration mmaps[0] [0x1000 - 0x8fff]
vfio_migration_set_state (VFIO user </var/run/vfio-user.sock>) state 4
vfio_load_state (VFIO user </var/run/vfio-user.sock>) data 0xffffffffef100003 vfio_load_state (VFIO user </var/run/vfio-user.sock>) data 0xffffffffef100004
vfio_load_state_device_data (VFIO user </var/run/vfio-user.sock>) Offset 0x1000 size 0x8000
vfio_load_state (VFIO user </var/run/vfio-user.sock>) data 0xffffffffef100004
vfio_listener_region_del region_del 0xc0000 - 0xdffff
vfio_listener_region_add_ram region_add [ram] 0xc0000 - 0xcafff [0x7fa250200000]
vfio_listener_region_add_ram region_add [ram] 0xcb000 - 0xcdfff [0x7fa2506cb000]
vfio_listener_region_add_ram region_add [ram] 0xce000 - 0xdffff [0x7fa25020e000]
vfio_listener_region_add_skip SKIPPING region_add 0xb0000000 - 0xbfffffff
vfio_listener_region_del region_del 0xc0000 - 0xcafff
vfio_listener_region_del region_del 0xce000 - 0xdffff
vfio_listener_region_del region_del 0xe0000 - 0xfffff
vfio_listener_region_add_ram region_add [ram] 0xc0000 - 0xcafff [0x7fa2506c0000]
vfio_listener_region_add_ram region_add [ram] 0xce000 - 0xebfff [0x7fa2506ce000]
vfio_listener_region_add_ram region_add [ram] 0xec000 - 0xeffff [0x7fa2506ec000] vfio_listener_region_add_ram region_add [ram] 0xf0000 - 0xfffff [0x7fa2506f0000]
vfio_listener_region_add_skip SKIPPING region_add 0xfed1c000 - 0xfed1ffff
vfio_listener_region_add_skip SKIPPING region_add 0xfebd7000 - 0xfebd7fff
vfio_listener_region_add_ram region_add [ram] 0xfd000000 - 0xfdffffff [0x7fa241400000]
vfio_listener_region_add_skip SKIPPING region_add 0xfebd4000 - 0xfebd43ff
vfio_listener_region_add_skip SKIPPING region_add 0xfebd4400 - 0xfebd441f
vfio_listener_region_add_skip SKIPPING region_add 0xfebd4420 - 0xfebd44ff
vfio_listener_region_add_skip SKIPPING region_add 0xfebd4500 - 0xfebd4515
vfio_listener_region_add_skip SKIPPING region_add 0xfebd4516 - 0xfebd45ff
vfio_listener_region_add_skip SKIPPING region_add 0xfebd4600 - 0xfebd4607
vfio_listener_region_add_skip SKIPPING region_add 0xfebd4608 - 0xfebd4fff
vfio_listener_region_add_skip SKIPPING region_add 0xfe000000 - 0xfe000fff
vfio_listener_region_add_skip SKIPPING region_add 0xfe001000 - 0xfe001fff
vfio_listener_region_add_skip SKIPPING region_add 0xfe002000 - 0xfe002fff
vfio_listener_region_add_skip SKIPPING region_add 0xfe003000 - 0xfe003fff
vfio_load_state (VFIO user </var/run/vfio-user.sock>) data 0xffffffffef100002
vfio_listener_region_add_skip SKIPPING region_add 0xfebd0000 - 0xfebd0fff
vfio_listener_region_add_ram region_add [ram] 0xfebd1000 - 0xfebd1fff [0x7fa35db96000]
vfio_listener_region_add_skip SKIPPING region_add 0xfebd0000 - 0xfebd0fff
vfio_listener_region_add_ram region_add [ram] 0xfebd1000 - 0xfebd1fff [0x7fa35db96000]
vfio_listener_region_add_skip SKIPPING region_add 0xfebd2000 - 0xfebd3fff
vfio_listener_region_add_skip SKIPPING region_add 0xfebd5000 - 0xfebd53ff
vfio_listener_region_add_skip SKIPPING region_add 0xfebd5400 - 0xfebd5fff
vfio_listener_region_add_skip SKIPPING region_add 0xfebd6000 - 0xfebd6fff
vfio_pci_write_config (VFIO user </var/run/vfio-user.sock>, @0x4, 0x507, len=0x2)
vfio_listener_region_add_skip SKIPPING region_add 0xfebd2000 - 0xfebd3fff
vfio_region_mmaps_set_enabled Region VFIO user </var/run/vfio-user.sock> BAR 0 mmaps enabled: 1
vfio_region_mmaps_set_enabled Region VFIO user </var/run/vfio-user.sock> BAR 4 mmaps enabled: 1
vfio_region_mmaps_set_enabled Region VFIO user </var/run/vfio-user.sock> BAR 5 mmaps enabled: 1
vfio_intx_disable (VFIO user </var/run/vfio-user.sock>)
vfio_msix_vector_do_use (VFIO user </var/run/vfio-user.sock>) vector 3 used
Metadata
Metadata
Assignees
Labels
No labels