diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 71727460968f9d25445c9f179d55e7856729fb5e..1a056b774eecb3d3af5525899198c313cd3adb78 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -356,6 +356,11 @@ void r100_fence_ring_emit(struct radeon_device *rdev,
 	/* Wait until IDLE & CLEAN */
 	radeon_ring_write(rdev, PACKET0(0x1720, 0));
 	radeon_ring_write(rdev, (1 << 16) | (1 << 17));
+	radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
+	radeon_ring_write(rdev, rdev->config.r100.hdp_cntl |
+				RADEON_HDP_READ_BUFFER_INVALIDATE);
+	radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
+	radeon_ring_write(rdev, rdev->config.r100.hdp_cntl);
 	/* Emit fence sequence & fire IRQ */
 	radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
 	radeon_ring_write(rdev, fence->seq);
@@ -1713,14 +1718,6 @@ void r100_gpu_init(struct radeon_device *rdev)
 	r100_hdp_reset(rdev);
 }
 
-void r100_hdp_flush(struct radeon_device *rdev)
-{
-	u32 tmp;
-	tmp = RREG32(RADEON_HOST_PATH_CNTL);
-	tmp |= RADEON_HDP_READ_BUFFER_INVALIDATE;
-	WREG32(RADEON_HOST_PATH_CNTL, tmp);
-}
-
 void r100_hdp_reset(struct radeon_device *rdev)
 {
 	uint32_t tmp;
@@ -3313,6 +3310,7 @@ static int r100_startup(struct radeon_device *rdev)
 	}
 	/* Enable IRQ */
 	r100_irq_set(rdev);
+	rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
 	/* 1M ring buffer */
 	r = r100_cp_init(rdev, 1024 * 1024);
 	if (r) {
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 3f2cc9e2e8d9a9f4a629361a2ea545c5f1b61f2c..b8623b734a27e6489419883eb92924f434432bca 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -36,7 +36,15 @@
 #include "rv350d.h"
 #include "r300_reg_safe.h"
 
-/* This files gather functions specifics to: r300,r350,rv350,rv370,rv380 */
+/* This files gather functions specifics to: r300,r350,rv350,rv370,rv380
+ *
+ * GPU Errata:
+ * - HOST_PATH_CNTL: r300 family seems to dislike write to HOST_PATH_CNTL
+ *   using MMIO to flush host path read cache, this lead to HARDLOCKUP.
+ *   However, scheduling such write to the ring seems harmless, i suspect
+ *   the CP read collide with the flush somehow, or maybe the MC, hard to
+ *   tell. (Jerome Glisse)
+ */
 
 /*
  * rv370,rv380 PCIE GART
@@ -178,6 +186,11 @@ void r300_fence_ring_emit(struct radeon_device *rdev,
 	/* Wait until IDLE & CLEAN */
 	radeon_ring_write(rdev, PACKET0(0x1720, 0));
 	radeon_ring_write(rdev, (1 << 17) | (1 << 16)  | (1 << 9));
+	radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
+	radeon_ring_write(rdev, rdev->config.r300.hdp_cntl |
+				RADEON_HDP_READ_BUFFER_INVALIDATE);
+	radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
+	radeon_ring_write(rdev, rdev->config.r300.hdp_cntl);
 	/* Emit fence sequence & fire IRQ */
 	radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
 	radeon_ring_write(rdev, fence->seq);
@@ -1258,6 +1271,7 @@ static int r300_startup(struct radeon_device *rdev)
 	}
 	/* Enable IRQ */
 	r100_irq_set(rdev);
+	rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
 	/* 1M ring buffer */
 	r = r100_cp_init(rdev, 1024 * 1024);
 	if (r) {
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index f46502a253ec332302ab86dd9a7ec729223ca88f..1d4d16ed7db114ae70e7768cd9199642eee236a9 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -219,6 +219,7 @@ static int r420_startup(struct radeon_device *rdev)
 	r420_pipes_init(rdev);
 	/* Enable IRQ */
 	r100_irq_set(rdev);
+	rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
 	/* 1M ring buffer */
 	r = r100_cp_init(rdev, 1024 * 1024);
 	if (r) {
diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c
index 0f3843b6dac7669991d4d638c6f002d7204a9054..9a189072f2b93e09299ff63b1464d32e260ba657 100644
--- a/drivers/gpu/drm/radeon/r520.c
+++ b/drivers/gpu/drm/radeon/r520.c
@@ -186,6 +186,7 @@ static int r520_startup(struct radeon_device *rdev)
 	}
 	/* Enable IRQ */
 	rs600_irq_set(rdev);
+	rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
 	/* 1M ring buffer */
 	r = r100_cp_init(rdev, 1024 * 1024);
 	if (r) {
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 921926f3d1f3b6c8f9faf596c1dc92fb549aeda0..e2f43c184aa9def8fdce58235b685e8150bed9c0 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1388,11 +1388,6 @@ void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 	(void)RREG32(PCIE_PORT_DATA);
 }
 
-void r600_hdp_flush(struct radeon_device *rdev)
-{
-	WREG32(R_005480_HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
-}
-
 /*
  * CP & Ring
  */
@@ -1789,6 +1784,8 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
 	radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
 	radeon_ring_write(rdev, fence->seq);
+	radeon_ring_write(rdev, PACKET0(R_005480_HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
+	radeon_ring_write(rdev, 1);
 	/* CP_INTERRUPT packet 3 no longer exists, use packet 0 */
 	radeon_ring_write(rdev, PACKET0(CP_INT_STATUS, 0));
 	radeon_ring_write(rdev, RB_INT_STAT);
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 9aecafb51b660947bfd2d2d7d93e92c3b3be1788..8787ea89dc6e0437acf493a48ae93d7e75fce8cc 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -577,9 +577,9 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
 	ring_size = num_loops * dwords_per_loop;
 	/* set default  + shaders */
 	ring_size += 40; /* shaders + def state */
-	ring_size += 5; /* fence emit for VB IB */
+	ring_size += 7; /* fence emit for VB IB */
 	ring_size += 5; /* done copy */
-	ring_size += 5; /* fence emit for done copy */
+	ring_size += 7; /* fence emit for done copy */
 	r = radeon_ring_lock(rdev, ring_size);
 	WARN_ON(r);
 
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index cee8bdc6c9ffb4a986ddaf6d0790dae38cd01991..eb5f99b9469debb3941cbc2b1aa91c35283cab9c 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -656,7 +656,6 @@ struct radeon_asic {
 			       uint32_t offset, uint32_t obj_size);
 	int (*clear_surface_reg)(struct radeon_device *rdev, int reg);
 	void (*bandwidth_update)(struct radeon_device *rdev);
-	void (*hdp_flush)(struct radeon_device *rdev);
 	void (*hpd_init)(struct radeon_device *rdev);
 	void (*hpd_fini)(struct radeon_device *rdev);
 	bool (*hpd_sense)(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -669,12 +668,14 @@ struct radeon_asic {
 struct r100_asic {
 	const unsigned	*reg_safe_bm;
 	unsigned	reg_safe_bm_size;
+	u32		hdp_cntl;
 };
 
 struct r300_asic {
 	const unsigned	*reg_safe_bm;
 	unsigned	reg_safe_bm_size;
 	u32		resync_scratch;
+	u32		hdp_cntl;
 };
 
 struct r600_asic {
@@ -1010,7 +1011,6 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 #define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->set_surface_reg((rdev), (r), (f), (p), (o), (s)))
 #define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->clear_surface_reg((rdev), (r)))
 #define radeon_bandwidth_update(rdev) (rdev)->asic->bandwidth_update((rdev))
-#define radeon_hdp_flush(rdev) (rdev)->asic->hdp_flush((rdev))
 #define radeon_hpd_init(rdev) (rdev)->asic->hpd_init((rdev))
 #define radeon_hpd_fini(rdev) (rdev)->asic->hpd_fini((rdev))
 #define radeon_hpd_sense(rdev, hpd) (rdev)->asic->hpd_sense((rdev), (hpd))
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index eb29217bbf1d51d90f7060c1610e0342b65bb2d9..f2fbd2e4e9df59479660f26811d04a92b8955b95 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -77,7 +77,6 @@ int r100_clear_surface_reg(struct radeon_device *rdev, int reg);
 void r100_bandwidth_update(struct radeon_device *rdev);
 void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int r100_ring_test(struct radeon_device *rdev);
-void r100_hdp_flush(struct radeon_device *rdev);
 void r100_hpd_init(struct radeon_device *rdev);
 void r100_hpd_fini(struct radeon_device *rdev);
 bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -114,7 +113,6 @@ static struct radeon_asic r100_asic = {
 	.set_surface_reg = r100_set_surface_reg,
 	.clear_surface_reg = r100_clear_surface_reg,
 	.bandwidth_update = &r100_bandwidth_update,
-	.hdp_flush = &r100_hdp_flush,
 	.hpd_init = &r100_hpd_init,
 	.hpd_fini = &r100_hpd_fini,
 	.hpd_sense = &r100_hpd_sense,
@@ -174,7 +172,6 @@ static struct radeon_asic r300_asic = {
 	.set_surface_reg = r100_set_surface_reg,
 	.clear_surface_reg = r100_clear_surface_reg,
 	.bandwidth_update = &r100_bandwidth_update,
-	.hdp_flush = &r100_hdp_flush,
 	.hpd_init = &r100_hpd_init,
 	.hpd_fini = &r100_hpd_fini,
 	.hpd_sense = &r100_hpd_sense,
@@ -218,7 +215,6 @@ static struct radeon_asic r420_asic = {
 	.set_surface_reg = r100_set_surface_reg,
 	.clear_surface_reg = r100_clear_surface_reg,
 	.bandwidth_update = &r100_bandwidth_update,
-	.hdp_flush = &r100_hdp_flush,
 	.hpd_init = &r100_hpd_init,
 	.hpd_fini = &r100_hpd_fini,
 	.hpd_sense = &r100_hpd_sense,
@@ -267,7 +263,6 @@ static struct radeon_asic rs400_asic = {
 	.set_surface_reg = r100_set_surface_reg,
 	.clear_surface_reg = r100_clear_surface_reg,
 	.bandwidth_update = &r100_bandwidth_update,
-	.hdp_flush = &r100_hdp_flush,
 	.hpd_init = &r100_hpd_init,
 	.hpd_fini = &r100_hpd_fini,
 	.hpd_sense = &r100_hpd_sense,
@@ -324,7 +319,6 @@ static struct radeon_asic rs600_asic = {
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
 	.bandwidth_update = &rs600_bandwidth_update,
-	.hdp_flush = &r100_hdp_flush,
 	.hpd_init = &rs600_hpd_init,
 	.hpd_fini = &rs600_hpd_fini,
 	.hpd_sense = &rs600_hpd_sense,
@@ -372,7 +366,6 @@ static struct radeon_asic rs690_asic = {
 	.set_surface_reg = r100_set_surface_reg,
 	.clear_surface_reg = r100_clear_surface_reg,
 	.bandwidth_update = &rs690_bandwidth_update,
-	.hdp_flush = &r100_hdp_flush,
 	.hpd_init = &rs600_hpd_init,
 	.hpd_fini = &rs600_hpd_fini,
 	.hpd_sense = &rs600_hpd_sense,
@@ -424,7 +417,6 @@ static struct radeon_asic rv515_asic = {
 	.set_surface_reg = r100_set_surface_reg,
 	.clear_surface_reg = r100_clear_surface_reg,
 	.bandwidth_update = &rv515_bandwidth_update,
-	.hdp_flush = &r100_hdp_flush,
 	.hpd_init = &rs600_hpd_init,
 	.hpd_fini = &rs600_hpd_fini,
 	.hpd_sense = &rs600_hpd_sense,
@@ -467,7 +459,6 @@ static struct radeon_asic r520_asic = {
 	.set_surface_reg = r100_set_surface_reg,
 	.clear_surface_reg = r100_clear_surface_reg,
 	.bandwidth_update = &rv515_bandwidth_update,
-	.hdp_flush = &r100_hdp_flush,
 	.hpd_init = &rs600_hpd_init,
 	.hpd_fini = &rs600_hpd_fini,
 	.hpd_sense = &rs600_hpd_sense,
@@ -508,7 +499,6 @@ int r600_ring_test(struct radeon_device *rdev);
 int r600_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset, uint64_t dst_offset,
 		   unsigned num_pages, struct radeon_fence *fence);
-void r600_hdp_flush(struct radeon_device *rdev);
 void r600_hpd_init(struct radeon_device *rdev);
 void r600_hpd_fini(struct radeon_device *rdev);
 bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -544,7 +534,6 @@ static struct radeon_asic r600_asic = {
 	.set_surface_reg = r600_set_surface_reg,
 	.clear_surface_reg = r600_clear_surface_reg,
 	.bandwidth_update = &rv515_bandwidth_update,
-	.hdp_flush = &r600_hdp_flush,
 	.hpd_init = &r600_hpd_init,
 	.hpd_fini = &r600_hpd_fini,
 	.hpd_sense = &r600_hpd_sense,
@@ -589,7 +578,6 @@ static struct radeon_asic rv770_asic = {
 	.set_surface_reg = r600_set_surface_reg,
 	.clear_surface_reg = r600_clear_surface_reg,
 	.bandwidth_update = &rv515_bandwidth_update,
-	.hdp_flush = &r600_hdp_flush,
 	.hpd_init = &r600_hpd_init,
 	.hpd_fini = &r600_hpd_fini,
 	.hpd_sense = &r600_hpd_sense,
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 60df2d7e7e4c9dd2572bee249978286ac109a549..0e1325e1853464d4e46552a2a6cfd523d406e4e6 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -131,7 +131,6 @@ int radeon_gem_set_domain(struct drm_gem_object *gobj,
 			printk(KERN_ERR "Failed to wait for object !\n");
 			return r;
 		}
-		radeon_hdp_flush(robj->rdev);
 	}
 	return 0;
 }
@@ -312,7 +311,6 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 	mutex_lock(&dev->struct_mutex);
 	drm_gem_object_unreference(gobj);
 	mutex_unlock(&dev->struct_mutex);
-	radeon_hdp_flush(robj->rdev);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index bf7b3cf80ed70746ee8a2d1333c7a301e1ba4bd3..9f5418983e2a5f30af108287aaa18f19d6bc70b3 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -396,6 +396,7 @@ static int rs400_startup(struct radeon_device *rdev)
 		return r;
 	/* Enable IRQ */
 	r100_irq_set(rdev);
+	rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
 	/* 1M ring buffer */
 	r = r100_cp_init(rdev, 1024 * 1024);
 	if (r) {
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 19258943a3706c2e5a18556e0a0f9eabbf1aedc9..a0378c57e4ec734aebeaac744cd00b3630ef6b58 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -554,6 +554,7 @@ static int rs600_startup(struct radeon_device *rdev)
 		return r;
 	/* Enable IRQ */
 	rs600_irq_set(rdev);
+	rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
 	/* 1M ring buffer */
 	r = r100_cp_init(rdev, 1024 * 1024);
 	if (r) {
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index 42f27205a5978bd11d7c737b74d1d136d6b0962a..cd31da913771d898c3d68946f0f8f3399becf382 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -626,6 +626,7 @@ static int rs690_startup(struct radeon_device *rdev)
 		return r;
 	/* Enable IRQ */
 	rs600_irq_set(rdev);
+	rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
 	/* 1M ring buffer */
 	r = r100_cp_init(rdev, 1024 * 1024);
 	if (r) {
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 59632a506b46cc53ad6f74e8ca8f527662f6758c..62756717b0449bdbc3be45ec40f4cd2bc99e1058 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -479,6 +479,7 @@ static int rv515_startup(struct radeon_device *rdev)
 	}
 	/* Enable IRQ */
 	rs600_irq_set(rdev);
+	rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
 	/* 1M ring buffer */
 	r = r100_cp_init(rdev, 1024 * 1024);
 	if (r) {