Skip to content

vfio-pci - Intel Arc DG2 - host errors

Related to #1597 (closed)

Log attached.

Host environment

  • Operating system: Arch Linux
  • OS/kernel version: 6.2.10-arch1-1
  • Architecture: x86_64
  • QEMU flavor: qemu-system-x86_64
  • QEMU version: 7.2.1
  • QEMU command line:
     2023-04-08 13:04:12.505+0000: starting up libvirt version: 9.2.0, qemu version: 7.2.1, kernel: 6.2.9-arch1-1, hostname: xana
     LC_ALL=C \
     PATH=/usr/local/sbin:/usr/local/bin:/usr/bin \
     HOME=/var/lib/libvirt/qemu/domain-1-wintesting \
     XDG_DATA_HOME=/var/lib/libvirt/qemu/domain-1-wintesting/.local/share \
     XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain-1-wintesting/.cache \
     XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain-1-wintesting/.config \
     TZ=US/Eastern \
     /usr/bin/qemu-system-x86_64 \
     -name guest=wintesting,debug-threads=on \
     -S \
     -object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain-1-wintesting/master-key.aes"}' \
     -blockdev '{"driver":"file","filename":"/usr/share/edk2-ovmf/x64/OVMF_CODE.fd","node-name":"libvirt-pflash0-storage","auto-read-only":true,"discard":"unmap"}' \
     -blockdev '{"node-name":"libvirt-pflash0-format","read-only":true,"driver":"raw","file":"libvirt-pflash0-storage"}' \
     -blockdev '{"driver":"file","filename":"/var/lib/libvirt/qemu/nvram/wintesting_VARS.fd","node-name":"libvirt-pflash1-storage","auto-read-only":true,"discard":"unmap"}' \
     -blockdev '{"node-name":"libvirt-pflash1-format","read-only":false,"driver":"raw","file":"libvirt-pflash1-storage"}' \
     -machine pc-q35-7.0,usb=off,vmport=off,dump-guest-core=off,memory-backend=pc.ram,pflash0=libvirt-pflash0-format,pflash1=libvirt-pflash1-format \
     -accel kvm \
     -cpu host,migratable=on,topoext=on,hv-relaxed=on,hv-vapic=on,hv-spinlocks=0x1fff,kvm=off \
     -m 28672 \
     -object '{"qom-type":"memory-backend-ram","id":"pc.ram","size":30064771072}' \
     -overcommit mem-lock=off \
     -smp 24,sockets=1,dies=1,cores=12,threads=2 \
     -uuid 99170a1c-1c81-4aeb-bdd7-6acbf3187f8a \
     -smbios 'type=0,vendor=American Megatrends International,, LLC.,version=P2.10' \
     -smbios 'type=1,manufacturer=ASRock,product=B550 Taichi' \
     -smbios 'type=2,manufacturer=Canary Systems,, LLC,product=RIG,version=1.0,serial=001' \
     -no-user-config \
     -nodefaults \
     -chardev socket,id=charmonitor,fd=32,server=on,wait=off \
     -mon chardev=charmonitor,id=monitor,mode=control \
     -rtc base=localtime \
     -no-shutdown \
     -global ICH9-LPC.disable_s3=1 \
     -global ICH9-LPC.disable_s4=1 \
     -boot menu=on,strict=on \
     -device '{"driver":"pcie-root-port","port":16,"chassis":1,"id":"pci.1","bus":"pcie.0","multifunction":true,"addr":"0x2"}' \
     -device '{"driver":"pcie-root-port","port":17,"chassis":2,"id":"pci.2","bus":"pcie.0","addr":"0x2.0x1"}' \
     -device '{"driver":"pcie-root-port","port":18,"chassis":3,"id":"pci.3","bus":"pcie.0","addr":"0x2.0x2"}' \
     -device '{"driver":"pcie-root-port","port":19,"chassis":4,"id":"pci.4","bus":"pcie.0","addr":"0x2.0x3"}' \
     -device '{"driver":"pcie-root-port","port":20,"chassis":5,"id":"pci.5","bus":"pcie.0","addr":"0x2.0x4"}' \
     -device '{"driver":"pcie-root-port","port":21,"chassis":6,"id":"pci.6","bus":"pcie.0","addr":"0x2.0x5"}' \
     -device '{"driver":"pcie-root-port","port":22,"chassis":7,"id":"pci.7","bus":"pcie.0","addr":"0x2.0x6"}' \
     -device '{"driver":"pcie-root-port","port":23,"chassis":8,"id":"pci.8","bus":"pcie.0","addr":"0x2.0x7"}' \
     -device '{"driver":"pcie-root-port","port":24,"chassis":9,"id":"pci.9","bus":"pcie.0","multifunction":true,"addr":"0x3"}' \
     -device '{"driver":"pcie-root-port","port":25,"chassis":10,"id":"pci.10","bus":"pcie.0","addr":"0x3.0x1"}' \
     -device '{"driver":"pcie-root-port","port":26,"chassis":11,"id":"pci.11","bus":"pcie.0","addr":"0x3.0x2"}' \
     -device '{"driver":"pcie-root-port","port":27,"chassis":12,"id":"pci.12","bus":"pcie.0","addr":"0x3.0x3"}' \
     -device '{"driver":"pcie-root-port","port":28,"chassis":13,"id":"pci.13","bus":"pcie.0","addr":"0x3.0x4"}' \
     -device '{"driver":"pcie-root-port","port":29,"chassis":14,"id":"pci.14","bus":"pcie.0","addr":"0x3.0x5"}' \
     -device '{"driver":"qemu-xhci","p2":15,"p3":15,"id":"usb","bus":"pci.2","addr":"0x0"}' \
     -device '{"driver":"virtio-scsi-pci","id":"scsi0","bus":"pci.3","addr":"0x0"}' \
     -blockdev '{"driver":"host_device","filename":"/dev/disk/by-id/nvme-WD_Blue_SN570_2TB_21523V801428-part1","aio":"native","node-name":"libvirt-1-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \
     -blockdev '{"node-name":"libvirt-1-format","read-only":false,"discard":"unmap","cache":{"direct":true,"no-flush":false},"driver":"raw","file":"libvirt-1-storage"}' \
     -device '{"driver":"scsi-hd","bus":"scsi0.0","channel":0,"scsi-id":0,"lun":0,"device_id":"drive-scsi0-0-0-0","drive":"libvirt-1-format","id":"scsi0-0-0-0","bootindex":1,"write-cache":"on"}' \
     -chardev pty,id=charserial0 \
     -device '{"driver":"isa-serial","chardev":"charserial0","id":"serial0","index":0}' \
     -audiodev '{"id":"audio1","driver":"spice"}' \
     -spice port=5900,addr=0.0.0.0,disable-ticketing=on,image-compression=off,seamless-migration=on \
     -device '{"driver":"virtio-vga","id":"video0","max_outputs":1,"bus":"pcie.0","addr":"0x1"}' \
     -device '{"driver":"ich9-intel-hda","id":"sound0","bus":"pcie.0","addr":"0x1b"}' \
     -device '{"driver":"hda-duplex","id":"sound0-codec0","bus":"sound0.0","cad":0,"audiodev":"audio1"}' \
     -global ICH9-LPC.noreboot=off \
     -watchdog-action reset \
     -chardev spicevmc,id=charredir0,name=usbredir \
     -device '{"driver":"usb-redir","chardev":"charredir0","id":"redir0","bus":"usb.0","port":"2"}' \
     -chardev spicevmc,id=charredir1,name=usbredir \
     -device '{"driver":"usb-redir","chardev":"charredir1","id":"redir1","bus":"usb.0","port":"3"}' \
     -device '{"driver":"vfio-pci","host":"0000:18:00.0","id":"hostdev0","bus":"pci.1","addr":"0x0"}' \
     -device '{"driver":"vfio-pci","host":"0000:19:00.0","id":"hostdev1","bus":"pci.5","addr":"0x0"}' \
     -device '{"driver":"vfio-pci","host":"0000:1b:00.3","id":"hostdev2","bus":"pci.6","addr":"0x0"}' \
     -device '{"driver":"virtio-balloon-pci","id":"balloon0","bus":"pci.4","addr":"0x0"}' \
     -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \
     -msg timestamp=on
     char device redirected to /dev/pts/1 (label charserial0)
     2023-04-08 13:10:13.724+0000: shutting down, reason=shutdown

Emulated/Virtualized environment

  • Operating system: Windows 10
  • Architecture: x86_64

Emulated/Virtualized environment

  • Operating system: Ubuntu 22.04
  • Architecture: x86_64

Description of problem

The host continues to respond (slowly) after the VM is shutdown. Speeds back up to normal after about an hour. However, a reboot is required to get the host to operate normally.

When shutting down the VM, the host starts to display the following messages in dmesg:

[Thu Apr 13 01:30:47 2023] vfio-pci 0000:18:00.0: not ready 1023ms after FLR; waiting [Thu Apr 13 01:30:49 2023] vfio-pci 0000:18:00.0: not ready 2047ms after FLR; waiting [Thu Apr 13 01:30:52 2023] vfio-pci 0000:18:00.0: not ready 4095ms after FLR; waiting [Thu Apr 13 01:30:57 2023] vfio-pci 0000:18:00.0: not ready 8191ms after FLR; waiting [Thu Apr 13 01:31:06 2023] vfio-pci 0000:18:00.0: not ready 16383ms after FLR; waiting [Thu Apr 13 01:31:25 2023] vfio-pci 0000:18:00.0: not ready 32767ms after FLR; waiting [Thu Apr 13 01:31:59 2023] vfio-pci 0000:18:00.0: not ready 65535ms after FLR; giving up [Thu Apr 13 01:32:11 2023] vfio-pci 0000:18:00.0: not ready 1023ms after bus reset; waiting [Thu Apr 13 01:32:13 2023] vfio-pci 0000:18:00.0: not ready 2047ms after bus reset; waiting [Thu Apr 13 01:32:16 2023] vfio-pci 0000:18:00.0: not ready 4095ms after bus reset; waiting [Thu Apr 13 01:32:21 2023] vfio-pci 0000:18:00.0: not ready 8191ms after bus reset; waiting [Thu Apr 13 01:32:31 2023] vfio-pci 0000:18:00.0: not ready 16383ms after bus reset; waiting [Thu Apr 13 01:32:48 2023] vfio-pci 0000:18:00.0: not ready 32767ms after bus reset; waiting [Thu Apr 13 01:33:22 2023] vfio-pci 0000:18:00.0: not ready 65535ms after bus reset; giving up

Steps to reproduce

  1. Shutdown VM.

Additional information

I have startup and shutdown scripts that detach and reattach the card and these scripts work fine if I test them alone. It's only when I shutdown the VM that issue presents itself.

revert.sh

#!/bin/bash
set -x

systemctl reboot # to workaround host lockup on shutdown

# Load the config file with our environmental variables
source "/etc/libvirt/hooks/kvm.conf"
source "/etc/libvirt/hooks/vmPreBootSetup"

cpuSchedutil

# Unload VFIO-PCI Kernel Driver
modprobe -r vfio_pci
modprobe -r vfio_iommu_type1
modprobe -r vfio

# Re-Bind GPU to our display drivers
virsh nodedev-reattach $VIRSH_GPU_VIDEO
virsh nodedev-reattach $VIRSH_GPU_AUDIO

#modprobe drm_buddy intel_gtt video drm_display_helper cec ttm i915

# Restart Display Manager
systemctl restart sddm.service

Full dmesg log: vfio_13_april_2023.txt

To upload designs, you'll need to enable LFS and have an admin enable hashed storage. More information