diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 04f8bf21557fc29bc78fbf6c9a2daf3ee1978297..5d04859a472db57001a3b2e24349bf02683796f7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -342,7 +342,8 @@ enum vmw_display_unit_type {
 };
 
 
-#define VMW_QUIRK_SCREENTARGET (1U << 0)
+#define VMW_QUIRK_DST_SID_OK (1U << 0)
+#define VMW_QUIRK_SRC_SID_OK (1U << 1)
 
 struct vmw_sw_context{
 	struct drm_open_hash res_ht;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 497ad6aecfbbc07735d238bece6edfe28e5e1639..0ec5fd6c71f48ba5cbc77a6e5289fbd32349a444 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -674,13 +674,16 @@ static int vmw_cmd_surface_copy_check(struct vmw_private *dev_priv,
 	int ret;
 
 	cmd = container_of(header, struct vmw_sid_cmd, header);
-	ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
-				user_surface_converter,
-				&cmd->body.src.sid, NULL);
-	if (unlikely(ret != 0))
-		return ret;
 
-	if (sw_context->quirks & VMW_QUIRK_SCREENTARGET)
+	if (!(sw_context->quirks & VMW_QUIRK_SRC_SID_OK)) {
+		ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
+					user_surface_converter,
+					&cmd->body.src.sid, NULL);
+		if (ret != 0)
+			return ret;
+	}
+
+	if (sw_context->quirks & VMW_QUIRK_DST_SID_OK)
 		return 0;
 
 	return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
@@ -1264,7 +1267,7 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 	if (unlikely(suffix->maximumOffset > bo_size))
 		suffix->maximumOffset = bo_size;
 
-	if (sw_context->quirks & VMW_QUIRK_SCREENTARGET)
+	if (sw_context->quirks & VMW_QUIRK_DST_SID_OK)
 		goto out_no_surface;
 
 	ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
@@ -1505,6 +1508,9 @@ static int vmw_cmd_update_gb_image(struct vmw_private *dev_priv,
 
 	cmd = container_of(header, struct vmw_gb_surface_cmd, header);
 
+	if (sw_context->quirks & VMW_QUIRK_SRC_SID_OK)
+		return 0;
+
 	return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
 				 user_surface_converter,
 				 &cmd->body.image.sid, NULL);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 6680aa67386f337c7e144198e391bd2061b953b0..615ff6cfc4f9c8118dac44da79175ec1ae2baa5f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -487,7 +487,8 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
 					   struct vmw_surface *surface,
 					   struct vmw_framebuffer **out,
 					   const struct drm_mode_fb_cmd
-					   *mode_cmd)
+					   *mode_cmd,
+					   bool is_dmabuf_proxy)
 
 {
 	struct drm_device *dev = dev_priv->dev;
@@ -562,6 +563,7 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
 	vfbs->surface = surface;
 	vfbs->base.user_handle = mode_cmd->handle;
 	vfbs->master = drm_master_get(file_priv->master);
+	vfbs->is_dmabuf_proxy = is_dmabuf_proxy;
 
 	mutex_lock(&vmaster->fb_surf_mutex);
 	list_add_tail(&vfbs->head, &vmaster->fb_surf);
@@ -699,6 +701,82 @@ static int vmw_framebuffer_dmabuf_unpin(struct vmw_framebuffer *vfb)
 	return vmw_dmabuf_unpin(dev_priv, vfbd->buffer, false);
 }
 
+/**
+ * vmw_create_dmabuf_proxy - create a proxy surface for the DMA buf
+ *
+ * @dev: DRM device
+ * @mode_cmd: parameters for the new surface
+ * @dmabuf_mob: MOB backing the DMA buf
+ * @srf_out: newly created surface
+ *
+ * When the content FB is a DMA buf, we create a surface as a proxy to the
+ * same buffer.  This way we can do a surface copy rather than a surface DMA.
+ * This is a more efficient approach
+ *
+ * RETURNS:
+ * 0 on success, error code otherwise
+ */
+static int vmw_create_dmabuf_proxy(struct drm_device *dev,
+				   struct drm_mode_fb_cmd *mode_cmd,
+				   struct vmw_dma_buffer *dmabuf_mob,
+				   struct vmw_surface **srf_out)
+{
+	uint32_t format;
+	struct drm_vmw_size content_base_size;
+	int ret;
+
+
+	switch (mode_cmd->depth) {
+	case 32:
+	case 24:
+		format = SVGA3D_X8R8G8B8;
+		break;
+
+	case 16:
+	case 15:
+		format = SVGA3D_R5G6B5;
+		break;
+
+	case 8:
+		format = SVGA3D_P8;
+		break;
+
+	default:
+		DRM_ERROR("Invalid framebuffer format %d\n", mode_cmd->depth);
+		return -EINVAL;
+	}
+
+	content_base_size.width  = mode_cmd->width;
+	content_base_size.height = mode_cmd->height;
+	content_base_size.depth  = 1;
+
+	ret = vmw_surface_gb_priv_define(dev,
+			0, /* kernel visible only */
+			0, /* flags */
+			format,
+			true, /* can be a scanout buffer */
+			1, /* num of mip levels */
+			0,
+			content_base_size,
+			srf_out);
+	if (ret) {
+		DRM_ERROR("Failed to allocate proxy content buffer\n");
+		return ret;
+	}
+
+	/* Use the same MOB backing for surface */
+	vmw_dmabuf_reference(dmabuf_mob);
+
+	(*srf_out)->res.backup = dmabuf_mob;
+
+	/* FIXME:  Waiting for fbdev rework to do a proper reserve/pin */
+	ret = vmw_resource_validate(&(*srf_out)->res);
+
+	return ret;
+}
+
+
+
 static int vmw_kms_new_framebuffer_dmabuf(struct vmw_private *dev_priv,
 					  struct vmw_dma_buffer *dmabuf,
 					  struct vmw_framebuffer **out,
@@ -801,6 +879,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 	struct vmw_dma_buffer *bo = NULL;
 	struct ttm_base_object *user_obj;
 	struct drm_mode_fb_cmd mode_cmd;
+	bool is_dmabuf_proxy = false;
 	int ret;
 
 	mode_cmd.width = mode_cmd2->width;
@@ -849,13 +928,29 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 	if (ret)
 		goto err_out;
 
-	/* Create the new framebuffer depending one what we got back */
-	if (bo)
+	/*
+	 * We cannot use the SurfaceDMA command in an non-accelerated VM,
+	 * therefore, wrap the DMA buf in a surface so we can use the
+	 * SurfaceCopy command.
+	 */
+	if (bo && !(dev_priv->capabilities & SVGA_CAP_3D) &&
+	    dev_priv->active_display_unit == vmw_du_screen_target) {
+		ret = vmw_create_dmabuf_proxy(dev_priv->dev, &mode_cmd, bo,
+			&surface);
+		if (ret)
+			goto err_out;
+
+		is_dmabuf_proxy = true;
+	}
+
+	/* Create the new framebuffer depending one what we have */
+	if (surface)
+		ret = vmw_kms_new_framebuffer_surface(dev_priv, file_priv,
+						      surface, &vfb, &mode_cmd,
+						      is_dmabuf_proxy);
+	else if (bo)
 		ret = vmw_kms_new_framebuffer_dmabuf(dev_priv, bo, &vfb,
 						     &mode_cmd);
-	else if (surface)
-		ret = vmw_kms_new_framebuffer_surface(dev_priv, file_priv,
-						      surface, &vfb, &mode_cmd);
 	else
 		BUG();
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index 548fa872b39cf1685f49a78a2116e988d5cc5423..db8ae94c403c6f2e4987037fc05689df6f8dfc18 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -72,6 +72,7 @@ struct vmw_framebuffer_surface {
 	struct vmw_dma_buffer *buffer;
 	struct list_head head;
 	struct drm_master *master;
+	bool is_dmabuf_proxy;  /* true if this is proxy surface for DMA buf */
 };
 
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
index 0feac5675c514f7237778b01255b30906e224352..e0fc2485ddb16ab1da37b7dfab1261524c06dd5e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
@@ -31,8 +31,7 @@
  * If we set up the screen target otable, screen objects stop working.
  */
 
-#define VMW_OTABLE_SETUP_SUB ((VMWGFX_ENABLE_SCREEN_TARGET_OTABLE &&	\
-			       (dev_priv->capabilities & SVGA_CAP_3D)) ? 0 : 1)
+#define VMW_OTABLE_SETUP_SUB ((VMWGFX_ENABLE_SCREEN_TARGET_OTABLE ? 0 : 1))
 
 #ifdef CONFIG_64BIT
 #define VMW_PPN_SIZE 8
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 6738c1ebf09ad1cf2b5de6cbd78e32ce089c7d6d..9dcbe8ba08ea9b6e26d001e24f8a7f1a0a57a818 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -497,7 +497,7 @@ int vmw_user_dmabuf_alloc(struct vmw_private *dev_priv,
 
 	ret = vmw_dmabuf_init(dev_priv, &user_bo->dma, size,
 			      (dev_priv->has_mob) ?
-			      &vmw_sys_placement :
+			      &vmw_mob_placement :
 			      &vmw_vram_sys_placement, true,
 			      &vmw_user_dmabuf_destroy);
 	if (unlikely(ret != 0))
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index 3b8235c7ee42baf1fd56b8dff40f8119fad9cc45..ef99df7463f3cffa038331999d4b510e4d68c7ed 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -141,6 +141,63 @@ static void vmw_stdu_crtc_destroy(struct drm_crtc *crtc)
 
 
 
+/**
+ * vmw_stdu_dma_update - Update DMA buf dirty region on the SVGA device
+ *
+ * @dev_priv:  VMW DRM device
+ * @file_priv: Pointer to a drm file private structure
+ * @vfbs: VMW framebuffer surface that may need a DMA buf update
+ * @x: top/left corner of the content area to blit from
+ * @y: top/left corner of the content area to blit from
+ * @width: width of the blit area
+ * @height: height of the blit area
+ *
+ * The SVGA device may have the DMA buf cached, so before letting the
+ * device use it as the source image for a subsequent operation, we
+ * update the cached copy.
+ *
+ * RETURNs:
+ * 0 on success, error code on failure
+ */
+static int vmw_stdu_dma_update(struct vmw_private *dev_priv,
+			       struct drm_file *file_priv,
+			       struct vmw_framebuffer_surface *vfbs,
+			       uint32_t x, uint32_t y,
+			       uint32_t width, uint32_t height)
+{
+	size_t fifo_size;
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdUpdateGBImage body;
+	} img_update_cmd;
+
+
+	/* Only need to do this if the surface is a DMA buf proxy */
+	if (!vfbs->is_dmabuf_proxy)
+		return 0;
+
+	fifo_size = sizeof(img_update_cmd);
+
+	memset(&img_update_cmd, 0, fifo_size);
+	img_update_cmd.header.id   = SVGA_3D_CMD_UPDATE_GB_IMAGE;
+	img_update_cmd.header.size = sizeof(img_update_cmd.body);
+
+	img_update_cmd.body.image.sid = vfbs->surface->res.id;
+
+	img_update_cmd.body.box.x = x;
+	img_update_cmd.body.box.y = y;
+	img_update_cmd.body.box.w = width;
+	img_update_cmd.body.box.h = height;
+	img_update_cmd.body.box.d = 1;
+
+	return vmw_execbuf_process(file_priv, dev_priv, NULL,
+				   (void *) &img_update_cmd,
+				   fifo_size, 0, VMW_QUIRK_SRC_SID_OK,
+				   NULL, NULL);
+}
+
+
+
 /**
  * vmw_stdu_content_copy - copies an area from the content to display surface
  *
@@ -166,11 +223,13 @@ static int vmw_stdu_content_copy(struct vmw_private *dev_priv,
 				 uint32_t width, uint32_t height,
 				 uint32_t display_x, uint32_t display_y)
 {
-	size_t fifo_size;
+	struct vmw_framebuffer_surface *content_vfbs;
+	size_t fifo_size;	
 	int ret;
 	void *cmd;
+	u32 quirks = VMW_QUIRK_DST_SID_OK;
 
-	struct vmw_surface_dma {
+	struct {
 		SVGA3dCmdHeader     header;
 		SVGA3dCmdSurfaceDMA body;
 		SVGA3dCopyBox       area;
@@ -193,24 +252,43 @@ static int vmw_stdu_content_copy(struct vmw_private *dev_priv,
 		return -EINVAL;
 	}
 
+
 	if (stdu->content_fb_type == SEPARATE_DMA) {
 		struct vmw_framebuffer *content_vfb;
-		struct vmw_framebuffer_dmabuf *content_vfbd;
-		struct vmw_framebuffer_surface *content_vfbs;
 		struct drm_vmw_size cur_size = {0};
 		const struct svga3d_surface_desc *desc;
+		enum SVGA3dSurfaceFormat format;
 		SVGA3dCmdSurfaceDMASuffix *suffix;
 		SVGAGuestPtr ptr;
 
+
 		content_vfb  = vmw_framebuffer_to_vfb(stdu->content_fb);
-		content_vfbd = vmw_framebuffer_to_vfbd(stdu->content_fb);
-		content_vfbs = vmw_framebuffer_to_vfbs(stdu->content_fb);
 
 		cur_size.width  = width;
 		cur_size.height = height;
 		cur_size.depth  = 1;
 
-		desc = svga3dsurface_get_desc(content_vfbs->surface->format);
+		/* Derive a SVGA3dSurfaceFormat for the DMA buf */
+		switch (content_vfb->base.bits_per_pixel) {
+		case 32:
+			format = SVGA3D_A8R8G8B8;
+			break;
+		case 24:
+			format = SVGA3D_X8R8G8B8;
+			break;
+		case 16:
+			format = SVGA3D_R5G6B5;
+			break;
+		case 15:
+			format = SVGA3D_A1R5G5B5;
+			break;
+		default:
+			DRM_ERROR("Invalid color depth: %d\n",
+					content_vfb->base.depth);
+			return -EINVAL;
+		}
+
+		desc = svga3dsurface_get_desc(format);
 
 
 		fifo_size = sizeof(surface_dma_cmd);
@@ -250,19 +328,40 @@ static int vmw_stdu_content_copy(struct vmw_private *dev_priv,
 
 		cmd = (void *) &surface_dma_cmd;
 	} else {
-		struct vmw_framebuffer *content_vfb;
+		u32 src_id;
+
+
+		content_vfbs = vmw_framebuffer_to_vfbs(stdu->content_fb);
+
+		if (content_vfbs->is_dmabuf_proxy) {
+			ret = vmw_stdu_dma_update(dev_priv, file_priv,
+						  content_vfbs,
+						  content_x, content_y,
+						  width, height);
+
+			if (ret != 0) {
+				DRM_ERROR("Failed to update cached DMA buf\n");
+				return ret;
+			}
 
-		content_vfb = vmw_framebuffer_to_vfb(stdu->content_fb);
+			quirks |= VMW_QUIRK_SRC_SID_OK;
+			src_id = content_vfbs->surface->res.id;
+		} else {
+			struct vmw_framebuffer *content_vfb;
 
+			content_vfb = vmw_framebuffer_to_vfb(stdu->content_fb);
+			src_id = content_vfb->user_handle;
+		}
+ 
 		fifo_size = sizeof(surface_cpy_cmd);
 
-		memset(&surface_cpy_cmd, 0, sizeof(surface_cpy_cmd));
+		memset(&surface_cpy_cmd, 0, fifo_size);
 
 		surface_cpy_cmd.header.id   = SVGA_3D_CMD_SURFACE_COPY;
 		surface_cpy_cmd.header.size = sizeof(surface_cpy_cmd.body) +
 					      sizeof(surface_cpy_cmd.area);
 
-		surface_cpy_cmd.body.src.sid  = content_vfb->user_handle;
+		surface_cpy_cmd.body.src.sid  = src_id;
 		surface_cpy_cmd.body.dest.sid = stdu->display_srf->res.id;
 
 		surface_cpy_cmd.area.srcx = content_x;
@@ -276,8 +375,11 @@ static int vmw_stdu_content_copy(struct vmw_private *dev_priv,
 		cmd = (void *) &surface_cpy_cmd;
 	}
 
-	ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd,
-				  fifo_size, 0, VMW_QUIRK_SCREENTARGET,
+
+
+	ret = vmw_execbuf_process(file_priv, dev_priv, NULL,
+				  (void *) cmd,
+				  fifo_size, 0, quirks,
 				  NULL, NULL);
 
 	return ret;
@@ -391,7 +493,8 @@ static int vmw_stdu_bind_st(struct vmw_private *dev_priv,
  * vmw_stdu_update_st - Updates a Screen Target
  *
  * @dev_priv: VMW DRM device
- * @file_priv: Pointer to a drm file private structure
+ * @file_priv: Pointer to DRM file private structure.  Set to NULL when
+ *             we want to blank display.
  * @stdu: display unit affected
  * @update_area: area that needs to be updated
  *
@@ -412,6 +515,7 @@ static int vmw_stdu_update_st(struct vmw_private *dev_priv,
 	u32 width, height;
 	u32 display_update_x, display_update_y;
 	unsigned short display_x1, display_y1, display_x2, display_y2;
+	int ret;
 
 	struct {
 		SVGA3dCmdHeader header;
@@ -444,8 +548,11 @@ static int vmw_stdu_update_st(struct vmw_private *dev_priv,
 	height = min(update_area->y2, display_y2) -
 		 max(update_area->y1, display_y1);
 
+	/*
+	 * If content is on a separate surface, then copy the dirty area to
+	 * the display surface
+	 */
 	if (file_priv && stdu->content_fb_type != SAME_AS_DISPLAY) {
-		int ret;
 
 		ret = vmw_stdu_content_copy(dev_priv, file_priv,
 					    stdu,
@@ -459,6 +566,29 @@ static int vmw_stdu_update_st(struct vmw_private *dev_priv,
 		}
 	}
 
+
+	/*
+	 * If the display surface is the same as the content surface, then
+	 * it may be backed by a DMA buf.  If it is then we need to update
+	 * the device's cached copy of the DMA buf before issuing the screen
+	 * target update.
+	 */
+	if (file_priv && stdu->content_fb_type == SAME_AS_DISPLAY) {
+		struct vmw_framebuffer_surface *vfbs;
+
+		vfbs = vmw_framebuffer_to_vfbs(stdu->content_fb);
+		ret = vmw_stdu_dma_update(dev_priv, file_priv,
+					  vfbs,
+					  max(update_area->x1, display_x1),
+					  max(update_area->y1, display_y1),
+					  width, height);
+
+		if (ret != 0) {
+			DRM_ERROR("Failed to update cached DMA buffer\n");
+			return ret;
+		}
+	}
+
 	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
 
 	if (unlikely(cmd == NULL)) {
@@ -1066,8 +1196,7 @@ int vmw_kms_stdu_init_display(struct vmw_private *dev_priv)
 	if (!VMWGFX_ENABLE_SCREEN_TARGET_OTABLE)
 		return -ENOSYS;
 
-	if (!(dev_priv->capabilities & SVGA_CAP_GBOBJECTS) ||
-	    !(dev_priv->capabilities & SVGA_CAP_3D))
+	if (!(dev_priv->capabilities & SVGA_CAP_GBOBJECTS))
 		return -ENOSYS;
 
 	ret = drm_vblank_init(dev, VMWGFX_NUM_DISPLAY_UNITS);
@@ -1333,7 +1462,7 @@ int vmw_kms_stdu_present(struct vmw_private *dev_priv,
 		cmd->body.dest.sid = stdu[cur_du]->display_srf->res.id;
 
 		ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd,
-					  fifo_size, 0, VMW_QUIRK_SCREENTARGET,
+					  fifo_size, 0, VMW_QUIRK_DST_SID_OK,
 					  NULL, NULL);
 
 		if (unlikely(ret != 0))