struct radeon_fence *fence)
 {
        /* Also consider EVENT_WRITE_EOP.  it handles the interrupts + timestamps + events */
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
+       radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
+       /* wait for 3D idle clean */
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+       radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
        /* Emit fence sequence & fire IRQ */
        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
        radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
 
        radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);
        radeon_ring_write(rdev, dwords);
 
-       radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
-       radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
        /* SQ config */
        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 6));
        radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
        ring_size = num_loops * dwords_per_loop;
        /* set default  + shaders */
        ring_size += 40; /* shaders + def state */
-       ring_size += 7; /* fence emit for VB IB */
+       ring_size += 12; /* fence emit for VB IB */
        ring_size += 5; /* done copy */
-       ring_size += 7; /* fence emit for done copy */
+       ring_size += 12; /* fence emit for done copy */
        r = radeon_ring_lock(rdev, ring_size);
        if (r)
                return r;
 {
        int r;
 
-       radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
-       radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
-       /* wait for 3D idle clean */
-       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-       radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
-       radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
-
        if (rdev->r600_blit.vb_ib)
                r600_vb_ib_put(rdev);