From a752471c9fc1e58bed02e400620695499efdd383 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost <ehabkost@redhat.com> Date: Thu, 3 Sep 2009 16:22:15 -0300 Subject: [PATCH 1/2] Fix VM state change handlers running out of order Message-id: <87my6ovbt0.fsf@pike.pond.sub.org> RH-Author: Markus Armbruster <armbru@redhat.com> Patchwork-id: 3102 O-Subject: [PATCH RHEL-5] Fix VM state change handlers running out of order Bugzilla: 514522 CVE: RH-Acked-by: Juan Quintela <quintela@redhat.com> RH-Acked-by: Kevin Wolf <kwolf@redhat.com> RH-Acked-by: Luiz Capitulino <lcapitulino@redhat.com> When a VM state change handler changes VM state, other VM state change handlers can see the state transitions out of order. bmdma_map(), scsi_disk_init() and virtio_blk_init() install VM state change handlers to restart DMA. These handlers can vm_stop() by running into a write error on a drive with werror=stop. This throws the VM state change handler callback into disarray. Here's an example case I observed: 0. The virtual IDE drive goes south. All future writes return errors. 1. Something encounters a write error, and duly stops the VM with vm_stop(). 2. vm_stop() calls vm_state_notify(0). 3. vm_state_notify() runs the callbacks in list vm_change_state_head. It contains ide_dma_restart_cb() installed by bmdma_map(). It also contains audio_vm_change_state_handler() installed by audio_init(). 4. audio_vm_change_state_handler() stops audio stuff. 5. User continues VM with monitor command "c". This runs vm_start(). 6. vm_start() calls vm_state_notify(1). 7. vm_state_notify() runs the callbacks in vm_change_state_head. 8. ide_dma_restart_cb() happens to come first. It does its work, runs into a write error, and duly stops the VM with vm_stop(). 9. vm_stop() runs vm_state_notify(0). 10. vm_state_notify() runs the callbacks in vm_change_state_head. 11. audio_vm_change_state_handler() stops audio stuff. Which isn't running. 12. vm_stop() finishes, ide_dma_restart_cb() finishes, step 7's vm_state_notify() resumes running handlers. 13. audio_vm_change_state_handler() starts audio stuff. Oopsie. The reported bug is similar, only with qxl_vm_change_state_handler() instead of audio_vm_change_state_handler(). There, the double-stop in step 11 fails an assertion in libspice. Fix this by moving the actual write from each VM state change handler into a new bottom half (suggested by Gleb Natapov). Trivial backport of the patch proposed upstream. Technical issues should probably be discussed there. If this fix is deemed too risky at this stage, we can leave the bug unfixed, or try to paper over it by just removing the assertion in libspice. The VM state change handlers continue to run out of order then (stop, stop, start instead of stop, start, stop). Exact impact not known at this time, only so much: * Audio audio_vm_change_state_handler() starts / stops sound "voices", whatever that is. I figure stopping twice is harmless. However, we end up with voices started instead of stopped. Impact? * IDE, SCSI, virtio-blk {ide,scsi,virtion_blk}_dma_restart_cb() do nothing on stop. So they should be fine. * QXL qxl_vm_change_state_handler() starts / stops the worker, qxl_vga.timer, and qxl_pipe_read(). It raises an interrupt on start, which shouldn't matter. The double-stop triggers the assertion in libspice. If we remove that, we end up with the worker, timer and pipe running instead of stopped. Impact? * VDI vdi_port_vm_change_state_handler() updates PCIVDIPortDevice member running. It does more on start, but that doesn't matter. We end up with PCIVDIPortDevice member running true instead of false. Impact? http://brewweb.devel.redhat.com/brew/taskinfo?taskID=1907974 /mnt/brewroot/scratch/armbru/task_1907974/ I'd like Yaniv Kaul to verify this fixes the bug for him. Bug 511038. Please ACK or NACK. Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> --- qemu/hw/ide.c | 22 +++++++++++++++++++--- qemu/hw/scsi-disk.c | 21 ++++++++++++++++++--- qemu/hw/virtio-blk.c | 20 +++++++++++++++++--- 3 files changed, 54 insertions(+), 9 deletions(-) diff --git a/qemu/hw/ide.c b/qemu/hw/ide.c index 1e6b2b4..a4d78be 100644 --- a/qemu/hw/ide.c +++ b/qemu/hw/ide.c @@ -496,6 +496,7 @@ typedef struct BMDMAState { BlockDriverAIOCB *aiocb; int64_t sector_num; uint32_t nsector; + QEMUBH *bh; } BMDMAState; typedef struct PCIIDEState { @@ -1060,11 +1061,13 @@ static void ide_sector_write(IDEState *s) } } -static void ide_dma_restart_cb(void *opaque, int running) +static void ide_dma_restart_bh(void *opaque) { BMDMAState *bm = opaque; - if (!running) - return; + + qemu_bh_delete(bm->bh); + bm->bh = NULL; + if (bm->status & BM_STATUS_DMA_RETRY) { bm->status &= ~BM_STATUS_DMA_RETRY; ide_dma_restart(bm->ide_if); @@ -1074,6 +1077,19 @@ static void ide_dma_restart_cb(void *opaque, int running) } } +static void ide_dma_restart_cb(void *opaque, int running) +{ + BMDMAState *bm = opaque; + + if (!running) + return; + + if (!bm->bh) { + bm->bh = qemu_bh_new(ide_dma_restart_bh, bm); + qemu_bh_schedule(bm->bh); + } +} + static void ide_write_dma_cb(void *opaque, int ret) { BMDMAState *bm = opaque; diff --git a/qemu/hw/scsi-disk.c b/qemu/hw/scsi-disk.c index e4fc1bb..5991e12 100644 --- a/qemu/hw/scsi-disk.c +++ b/qemu/hw/scsi-disk.c @@ -76,6 +76,7 @@ struct SCSIDeviceState scsi_completionfn completion; void *opaque; char drive_serial_str[21]; + QEMUBH *bh; }; /* Global pool of SCSIRequest structures. */ @@ -312,12 +313,13 @@ static int scsi_write_data(SCSIDevice *d, uint32_t tag) return 0; } -static void scsi_dma_restart_cb(void *opaque, int running, int reason) +static void scsi_dma_restart_bh(void *opaque) { SCSIDeviceState *s = opaque; SCSIRequest *r = s->requests; - if (!running) - return; + + qemu_bh_delete(s->bh); + s->bh = NULL; while (r) { if (r->status & SCSI_REQ_STATUS_RETRY) { @@ -328,6 +330,19 @@ static void scsi_dma_restart_cb(void *opaque, int running, int reason) } } +static void scsi_dma_restart_cb(void *opaque, int running) +{ + SCSIDeviceState *s = opaque; + + if (!running) + return; + + if (!s->bh) { + s->bh = qemu_bh_new(scsi_dma_restart_bh, s); + qemu_bh_schedule(s->bh); + } +} + /* Return a pointer to the data buffer. */ static uint8_t *scsi_get_buf(SCSIDevice *d, uint32_t tag) { diff --git a/qemu/hw/virtio-blk.c b/qemu/hw/virtio-blk.c index e4060d4..58d5e37 100644 --- a/qemu/hw/virtio-blk.c +++ b/qemu/hw/virtio-blk.c @@ -23,6 +23,7 @@ typedef struct VirtIOBlock BlockDriverState *bs; VirtQueue *vq; void *rq; + QEMUBH *bh; } VirtIOBlock; static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev) @@ -219,13 +220,13 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) */ } -static void virtio_blk_dma_restart_cb(void *opaque, int running, int reason) +static void virtio_blk_dma_restart_bh(void *opaque) { VirtIOBlock *s = opaque; VirtIOBlockReq *req = s->rq; - if (!running) - return; + qemu_bh_delete(s->bh); + s->bh = NULL; s->rq = NULL; @@ -235,6 +236,19 @@ static void virtio_blk_dma_restart_cb(void *opaque, int running, int reason) } } +static void virtio_blk_dma_restart_cb(void *opaque, int running, int reason) +{ + VirtIOBlock *s = opaque; + + if (!running) + return; + + if (!s->bh) { + s->bh = qemu_bh_new(virtio_blk_dma_restart_bh, s); + qemu_bh_schedule(s->bh); + } +} + static void virtio_blk_reset(VirtIODevice *vdev) { /* -- 1.6.3.rc4.29.g8146