diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index dd0fbf76ac793d87580c1d750bad8983a25ef62e..dd2db025f7787e590d94bb229cb143559f9a8317 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -62,6 +62,7 @@ config ARCH_OMAP4
 	select PM_OPP if PM
 	select USB_ARCH_HAS_EHCI if USB_SUPPORT
 	select ARM_CPU_SUSPEND if PM
+	select ARCH_NEEDS_CPU_IDLE_COUPLED
 
 config SOC_OMAP5
 	bool "TI OMAP5"
diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c
index 02d15bbd4e35c2eb4d90cf892085d143d0334e81..ee05e193fc61e317b21b368c583f31ce71bbb76b 100644
--- a/arch/arm/mach-omap2/cpuidle44xx.c
+++ b/arch/arm/mach-omap2/cpuidle44xx.c
@@ -21,6 +21,7 @@
 #include "common.h"
 #include "pm.h"
 #include "prm.h"
+#include "clockdomain.h"
 
 /* Machine specific information */
 struct omap4_idle_statedata {
@@ -47,10 +48,14 @@ static struct omap4_idle_statedata omap4_idle_data[] = {
 	},
 };
 
-static struct powerdomain *mpu_pd, *cpu0_pd, *cpu1_pd;
+static struct powerdomain *mpu_pd, *cpu_pd[NR_CPUS];
+static struct clockdomain *cpu_clkdm[NR_CPUS];
+
+static atomic_t abort_barrier;
+static bool cpu_done[NR_CPUS];
 
 /**
- * omap4_enter_idle - Programs OMAP4 to enter the specified state
+ * omap4_enter_idle_coupled_[simple/coupled] - OMAP4 cpuidle entry functions
  * @dev: cpuidle device
  * @drv: cpuidle driver
  * @index: the index of state to be entered
@@ -59,60 +64,84 @@ static struct powerdomain *mpu_pd, *cpu0_pd, *cpu1_pd;
  * specified low power state selected by the governor.
  * Returns the amount of time spent in the low power state.
  */
-static int omap4_enter_idle(struct cpuidle_device *dev,
+static int omap4_enter_idle_simple(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv,
+			int index)
+{
+	local_fiq_disable();
+	omap_do_wfi();
+	local_fiq_enable();
+
+	return index;
+}
+
+static int omap4_enter_idle_coupled(struct cpuidle_device *dev,
 			struct cpuidle_driver *drv,
 			int index)
 {
 	struct omap4_idle_statedata *cx = &omap4_idle_data[index];
-	u32 cpu1_state;
 	int cpu_id = smp_processor_id();
 
 	local_fiq_disable();
 
 	/*
-	 * CPU0 has to stay ON (i.e in C1) until CPU1 is OFF state.
+	 * CPU0 has to wait and stay ON until CPU1 is OFF state.
 	 * This is necessary to honour hardware recommondation
 	 * of triggeing all the possible low power modes once CPU1 is
 	 * out of coherency and in OFF mode.
-	 * Update dev->last_state so that governor stats reflects right
-	 * data.
 	 */
-	cpu1_state = pwrdm_read_pwrst(cpu1_pd);
-	if (cpu1_state != PWRDM_POWER_OFF) {
-		index = drv->safe_state_index;
-		cx = &omap4_idle_data[index];
+	if (dev->cpu == 0 && cpumask_test_cpu(1, cpu_online_mask)) {
+		while (pwrdm_read_pwrst(cpu_pd[1]) != PWRDM_POWER_OFF) {
+			cpu_relax();
+
+			/*
+			 * CPU1 could have already entered & exited idle
+			 * without hitting off because of a wakeup
+			 * or a failed attempt to hit off mode.  Check for
+			 * that here, otherwise we could spin forever
+			 * waiting for CPU1 off.
+			 */
+			if (cpu_done[1])
+			    goto fail;
+
+		}
 	}
 
-	if (index > 0)
-		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu_id);
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu_id);
 
 	/*
 	 * Call idle CPU PM enter notifier chain so that
 	 * VFP and per CPU interrupt context is saved.
 	 */
-	if (cx->cpu_state == PWRDM_POWER_OFF)
-		cpu_pm_enter();
-
-	pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state);
-	omap_set_pwrdm_state(mpu_pd, cx->mpu_state);
-
-	/*
-	 * Call idle CPU cluster PM enter notifier chain
-	 * to save GIC and wakeupgen context.
-	 */
-	if ((cx->mpu_state == PWRDM_POWER_RET) &&
-		(cx->mpu_logic_state == PWRDM_POWER_OFF))
-			cpu_cluster_pm_enter();
+	cpu_pm_enter();
+
+	if (dev->cpu == 0) {
+		pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state);
+		omap_set_pwrdm_state(mpu_pd, cx->mpu_state);
+
+		/*
+		 * Call idle CPU cluster PM enter notifier chain
+		 * to save GIC and wakeupgen context.
+		 */
+		if ((cx->mpu_state == PWRDM_POWER_RET) &&
+			(cx->mpu_logic_state == PWRDM_POWER_OFF))
+				cpu_cluster_pm_enter();
+	}
 
 	omap4_enter_lowpower(dev->cpu, cx->cpu_state);
+	cpu_done[dev->cpu] = true;
+
+	/* Wakeup CPU1 only if it is not offlined */
+	if (dev->cpu == 0 && cpumask_test_cpu(1, cpu_online_mask)) {
+		clkdm_wakeup(cpu_clkdm[1]);
+		clkdm_allow_idle(cpu_clkdm[1]);
+	}
 
 	/*
 	 * Call idle CPU PM exit notifier chain to restore
-	 * VFP and per CPU IRQ context. Only CPU0 state is
-	 * considered since CPU1 is managed by CPU hotplug.
+	 * VFP and per CPU IRQ context.
 	 */
-	if (pwrdm_read_prev_pwrst(cpu0_pd) == PWRDM_POWER_OFF)
-		cpu_pm_exit();
+	cpu_pm_exit();
 
 	/*
 	 * Call idle CPU cluster PM exit notifier chain
@@ -121,8 +150,11 @@ static int omap4_enter_idle(struct cpuidle_device *dev,
 	if (omap4_mpuss_read_prev_context_state())
 		cpu_cluster_pm_exit();
 
-	if (index > 0)
-		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu_id);
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu_id);
+
+fail:
+	cpuidle_coupled_parallel_barrier(dev, &abort_barrier);
+	cpu_done[dev->cpu] = false;
 
 	local_fiq_enable();
 
@@ -141,7 +173,7 @@ struct cpuidle_driver omap4_idle_driver = {
 			.exit_latency = 2 + 2,
 			.target_residency = 5,
 			.flags = CPUIDLE_FLAG_TIME_VALID,
-			.enter = omap4_enter_idle,
+			.enter = omap4_enter_idle_simple,
 			.name = "C1",
 			.desc = "MPUSS ON"
 		},
@@ -149,8 +181,8 @@ struct cpuidle_driver omap4_idle_driver = {
                         /* C2 - CPU0 OFF + CPU1 OFF + MPU CSWR */
 			.exit_latency = 328 + 440,
 			.target_residency = 960,
-			.flags = CPUIDLE_FLAG_TIME_VALID,
-			.enter = omap4_enter_idle,
+			.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_COUPLED,
+			.enter = omap4_enter_idle_coupled,
 			.name = "C2",
 			.desc = "MPUSS CSWR",
 		},
@@ -158,8 +190,8 @@ struct cpuidle_driver omap4_idle_driver = {
 			/* C3 - CPU0 OFF + CPU1 OFF + MPU OSWR */
 			.exit_latency = 460 + 518,
 			.target_residency = 1100,
-			.flags = CPUIDLE_FLAG_TIME_VALID,
-			.enter = omap4_enter_idle,
+			.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_COUPLED,
+			.enter = omap4_enter_idle_coupled,
 			.name = "C3",
 			.desc = "MPUSS OSWR",
 		},
@@ -168,6 +200,16 @@ struct cpuidle_driver omap4_idle_driver = {
 	.safe_state_index = 0,
 };
 
+/*
+ * For each cpu, setup the broadcast timer because local timers
+ * stops for the states above C1.
+ */
+static void omap_setup_broadcast_timer(void *arg)
+{
+	int cpu = smp_processor_id();
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &cpu);
+}
+
 /**
  * omap4_idle_init - Init routine for OMAP4 idle
  *
@@ -180,19 +222,30 @@ int __init omap4_idle_init(void)
 	unsigned int cpu_id = 0;
 
 	mpu_pd = pwrdm_lookup("mpu_pwrdm");
-	cpu0_pd = pwrdm_lookup("cpu0_pwrdm");
-	cpu1_pd = pwrdm_lookup("cpu1_pwrdm");
-	if ((!mpu_pd) || (!cpu0_pd) || (!cpu1_pd))
+	cpu_pd[0] = pwrdm_lookup("cpu0_pwrdm");
+	cpu_pd[1] = pwrdm_lookup("cpu1_pwrdm");
+	if ((!mpu_pd) || (!cpu_pd[0]) || (!cpu_pd[1]))
 		return -ENODEV;
 
-	dev = &per_cpu(omap4_idle_dev, cpu_id);
-	dev->cpu = cpu_id;
+	cpu_clkdm[0] = clkdm_lookup("mpu0_clkdm");
+	cpu_clkdm[1] = clkdm_lookup("mpu1_clkdm");
+	if (!cpu_clkdm[0] || !cpu_clkdm[1])
+		return -ENODEV;
+
+	/* Configure the broadcast timer on each cpu */
+	on_each_cpu(omap_setup_broadcast_timer, NULL, 1);
+
+	for_each_cpu(cpu_id, cpu_online_mask) {
+		dev = &per_cpu(omap4_idle_dev, cpu_id);
+		dev->cpu = cpu_id;
+		dev->coupled_cpus = *cpu_online_mask;
 
-	cpuidle_register_driver(&omap4_idle_driver);
+		cpuidle_register_driver(&omap4_idle_driver);
 
-	if (cpuidle_register_device(dev)) {
-		pr_err("%s: CPUidle register device failed\n", __func__);
-		return -EIO;
+		if (cpuidle_register_device(dev)) {
+			pr_err("%s: CPUidle register failed\n", __func__);
+			return -EIO;
+		}
 	}
 
 	return 0;
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index 13d20c8a283dd352515d659ec4c39956228a954a..2ff6d41ec6c6c004ace041b525ec1821d6389653 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -130,6 +130,7 @@ static struct clock_event_device clockevent_gpt = {
 	.name		= "gp_timer",
 	.features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
+	.rating		= 300,
 	.set_next_event	= omap2_gp_timer_set_next_event,
 	.set_mode	= omap2_gp_timer_set_mode,
 };
@@ -223,7 +224,8 @@ static void __init omap2_gp_clockevent_init(int gptimer_id,
 		clockevent_delta2ns(3, &clockevent_gpt);
 		/* Timer internal resynch latency. */
 
-	clockevent_gpt.cpumask = cpumask_of(0);
+	clockevent_gpt.cpumask = cpu_possible_mask;
+	clockevent_gpt.irq = omap_dm_timer_get_irq(&clkev);
 	clockevents_register_device(&clockevent_gpt);
 
 	pr_info("OMAP clockevent source: GPTIMER%d at %lu Hz\n",