diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 4b9f4493c9f9d3008d46b64597d7c27d51a21197..7347075ca5b873a2192f57ad0647ee8fb15d8908 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -339,11 +339,12 @@ semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
 	int ret;
 
 	if (dev_priv->chipset < 0x84) {
-		ret = RING_SPACE(chan, 3);
+		ret = RING_SPACE(chan, 4);
 		if (ret)
 			return ret;
 
-		BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 2);
+		BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 3);
+		OUT_RING  (chan, NvSema);
 		OUT_RING  (chan, sema->mem->start);
 		OUT_RING  (chan, 1);
 	} else
@@ -351,10 +352,12 @@ semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
 		struct nouveau_vma *vma = &dev_priv->fence.bo->vma;
 		u64 offset = vma->offset + sema->mem->start;
 
-		ret = RING_SPACE(chan, 5);
+		ret = RING_SPACE(chan, 7);
 		if (ret)
 			return ret;
 
+		BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
+		OUT_RING  (chan, chan->vram_handle);
 		BEGIN_RING(chan, NvSubSw, 0x0010, 4);
 		OUT_RING  (chan, upper_32_bits(offset));
 		OUT_RING  (chan, lower_32_bits(offset));
@@ -394,11 +397,12 @@ semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
 	int ret;
 
 	if (dev_priv->chipset < 0x84) {
-		ret = RING_SPACE(chan, 4);
+		ret = RING_SPACE(chan, 5);
 		if (ret)
 			return ret;
 
-		BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1);
+		BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 2);
+		OUT_RING  (chan, NvSema);
 		OUT_RING  (chan, sema->mem->start);
 		BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_RELEASE, 1);
 		OUT_RING  (chan, 1);
@@ -407,10 +411,12 @@ semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
 		struct nouveau_vma *vma = &dev_priv->fence.bo->vma;
 		u64 offset = vma->offset + sema->mem->start;
 
-		ret = RING_SPACE(chan, 5);
+		ret = RING_SPACE(chan, 7);
 		if (ret)
 			return ret;
 
+		BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
+		OUT_RING  (chan, chan->vram_handle);
 		BEGIN_RING(chan, NvSubSw, 0x0010, 4);
 		OUT_RING  (chan, upper_32_bits(offset));
 		OUT_RING  (chan, lower_32_bits(offset));
@@ -504,22 +510,22 @@ nouveau_fence_channel_init(struct nouveau_channel *chan)
 	struct nouveau_gpuobj *obj = NULL;
 	int ret;
 
-	if (dev_priv->card_type >= NV_C0)
-		goto out_initialised;
+	if (dev_priv->card_type < NV_C0) {
+		/* Create an NV_SW object for various sync purposes */
+		ret = nouveau_gpuobj_gr_new(chan, NvSw, NV_SW);
+		if (ret)
+			return ret;
 
-	/* Create an NV_SW object for various sync purposes */
-	ret = nouveau_gpuobj_gr_new(chan, NvSw, NV_SW);
-	if (ret)
-		return ret;
+		ret = RING_SPACE(chan, 2);
+		if (ret)
+			return ret;
 
-	/* we leave subchannel empty for nvc0 */
-	ret = RING_SPACE(chan, 2);
-	if (ret)
-		return ret;
-	BEGIN_RING(chan, NvSubSw, 0, 1);
-	OUT_RING(chan, NvSw);
+		BEGIN_RING(chan, NvSubSw, 0, 1);
+		OUT_RING  (chan, NvSw);
+		FIRE_RING (chan);
+	}
 
-	/* Create a DMA object for the shared cross-channel sync area. */
+	/* Setup area of memory shared between all channels for x-chan sync */
 	if (USE_SEMA(dev) && dev_priv->chipset < 0x84) {
 		struct ttm_mem_reg *mem = &dev_priv->fence.bo->bo.mem;
 
@@ -534,23 +540,8 @@ nouveau_fence_channel_init(struct nouveau_channel *chan)
 		nouveau_gpuobj_ref(NULL, &obj);
 		if (ret)
 			return ret;
-
-		ret = RING_SPACE(chan, 2);
-		if (ret)
-			return ret;
-		BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
-		OUT_RING(chan, NvSema);
-	} else {
-		ret = RING_SPACE(chan, 2);
-		if (ret)
-			return ret;
-		BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
-		OUT_RING  (chan, chan->vram_handle); /* whole VM */
 	}
 
-	FIRE_RING(chan);
-
-out_initialised:
 	INIT_LIST_HEAD(&chan->fence.pending);
 	spin_lock_init(&chan->fence.lock);
 	atomic_set(&chan->fence.last_sequence_irq, 0);
diff --git a/drivers/gpu/drm/nouveau/nouveau_perf.c b/drivers/gpu/drm/nouveau/nouveau_perf.c
index 922fb6b664edda718e2e15f929500a2fe3967d21..ef9dec0e6f8b343c293eb7002560686838731180 100644
--- a/drivers/gpu/drm/nouveau/nouveau_perf.c
+++ b/drivers/gpu/drm/nouveau/nouveau_perf.c
@@ -182,6 +182,11 @@ nouveau_perf_init(struct drm_device *dev)
 		entries   = perf[2];
 	}
 
+	if (entries > NOUVEAU_PM_MAX_LEVEL) {
+		NV_DEBUG(dev, "perf table has too many entries - buggy vbios?\n");
+		entries = NOUVEAU_PM_MAX_LEVEL;
+	}
+
 	entry = perf + headerlen;
 	for (i = 0; i < entries; i++) {
 		struct nouveau_pm_level *perflvl = &pm->perflvl[pm->nr_perflvl];
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 80218887e0a004fc19337f180a93fe4ecb4196d2..144f79a350ae3d69b542a7f364988d194a28cbcb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -881,8 +881,8 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 
 #ifdef __BIG_ENDIAN
 	/* Put the card in BE mode if it's not */
-	if (nv_rd32(dev, NV03_PMC_BOOT_1))
-		nv_wr32(dev, NV03_PMC_BOOT_1, 0x00000001);
+	if (nv_rd32(dev, NV03_PMC_BOOT_1) != 0x01000001)
+		nv_wr32(dev, NV03_PMC_BOOT_1, 0x01000001);
 
 	DRM_MEMORYBARRIER();
 #endif
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 74a3f687270124cdfb32ddd55ce78f9ae70a6157..08da478ba544e312ab72b4b23b1db23607a433e5 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -409,7 +409,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	struct nouveau_channel *evo = dispc->sync;
 	int ret;
 
-	ret = RING_SPACE(evo, 24);
+	ret = RING_SPACE(evo, chan ? 25 : 27);
 	if (unlikely(ret))
 		return ret;
 
@@ -458,8 +458,19 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	/* queue the flip on the crtc's "display sync" channel */
 	BEGIN_RING(evo, 0, 0x0100, 1);
 	OUT_RING  (evo, 0xfffe0000);
-	BEGIN_RING(evo, 0, 0x0084, 5);
-	OUT_RING  (evo, chan ? 0x00000100 : 0x00000010);
+	if (chan) {
+		BEGIN_RING(evo, 0, 0x0084, 1);
+		OUT_RING  (evo, 0x00000100);
+	} else {
+		BEGIN_RING(evo, 0, 0x0084, 1);
+		OUT_RING  (evo, 0x00000010);
+		/* allows gamma somehow, PDISP will bitch at you if
+		 * you don't wait for vblank before changing this..
+		 */
+		BEGIN_RING(evo, 0, 0x00e0, 1);
+		OUT_RING  (evo, 0x40000000);
+	}
+	BEGIN_RING(evo, 0, 0x0088, 4);
 	OUT_RING  (evo, dispc->sem.offset);
 	OUT_RING  (evo, 0xf00d0000 | dispc->sem.value);
 	OUT_RING  (evo, 0x74b1e000);