diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index 3eadac5e171e489e157ffbb902609274a017ce17..31c5892f5acc49669c4326bf5162a42ebceb36ea 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -10,6 +10,7 @@
 #include <linux/config.h>
 #include <linux/version.h>
 #include <linux/errno.h>
+#include <linux/threads.h>
 #include <asm/thread_info.h>
 #include <asm/asi.h>
 #include <asm/pstate.h>
@@ -493,6 +494,35 @@ tlb_fixup_done:
 	call	prom_init
 	 mov	%l7, %o0			! OpenPROM cif handler
 
+	/* Initialize current_thread_info()->cpu as early as possible.
+	 * In order to do that accurately we have to patch up the get_cpuid()
+	 * assembler sequences.  And that, in turn, requires that we know
+	 * if we are on a Starfire box or not.  While we're here, patch up
+	 * the sun4v sequences as well.
+	 */
+	call	check_if_starfire
+	 nop
+	call	per_cpu_patch
+	 nop
+	call	sun4v_patch
+	 nop
+
+#ifdef CONFIG_SMP
+	call	hard_smp_processor_id
+	 nop
+	cmp	%o0, NR_CPUS
+	blu,pt	%xcc, 1f
+	 nop
+	call	boot_cpu_id_too_large
+	 nop
+	/* Not reached... */
+
+1:
+#else
+	mov	0, %o0
+#endif
+	stb	%o0, [%g6 + TI_CPU]
+
 	/* Off we go.... */
 	call	start_kernel
 	 nop
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index 005167f82419b475cb7ae1e4ef109610ba61a5dd..9cf1c88cd774ffbcbe3b58da0574986c4e6860ff 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -220,7 +220,7 @@ char reboot_command[COMMAND_LINE_SIZE];
 
 static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 };
 
-static void __init per_cpu_patch(void)
+void __init per_cpu_patch(void)
 {
 	struct cpuid_patch_entry *p;
 	unsigned long ver;
@@ -280,7 +280,7 @@ static void __init per_cpu_patch(void)
 	}
 }
 
-static void __init sun4v_patch(void)
+void __init sun4v_patch(void)
 {
 	struct sun4v_1insn_patch_entry *p1;
 	struct sun4v_2insn_patch_entry *p2;
@@ -315,6 +315,15 @@ static void __init sun4v_patch(void)
 	}
 }
 
+#ifdef CONFIG_SMP
+void __init boot_cpu_id_too_large(int cpu)
+{
+	prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n",
+		    cpu, NR_CPUS);
+	prom_halt();
+}
+#endif
+
 void __init setup_arch(char **cmdline_p)
 {
 	/* Initialize PROM console and command line. */
@@ -332,16 +341,6 @@ void __init setup_arch(char **cmdline_p)
 	conswitchp = &prom_con;
 #endif
 
-	/* Work out if we are starfire early on */
-	check_if_starfire();
-
-	/* Now we know enough to patch the get_cpuid sequences
-	 * used by trap code.
-	 */
-	per_cpu_patch();
-
-	sun4v_patch();
-
 	boot_flags_init(*cmdline_p);
 
 	idprom_init();
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 90eaca3ec9a628c40ba90f27c2a89e825afdf736..4e8cd79156e0e0adc4a737265290f1e7cf496871 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -1264,7 +1264,6 @@ void __init smp_tick_init(void)
 	boot_cpu_id = hard_smp_processor_id();
 	current_tick_offset = timer_tick_offset;
 
-	cpu_set(boot_cpu_id, cpu_online_map);
 	prof_counter(boot_cpu_id) = prof_multiplier(boot_cpu_id) = 1;
 }
 
@@ -1345,18 +1344,6 @@ void __init smp_setup_cpu_possible_map(void)
 
 void __devinit smp_prepare_boot_cpu(void)
 {
-	int cpu = hard_smp_processor_id();
-
-	if (cpu >= NR_CPUS) {
-		prom_printf("Serious problem, boot cpu id >= NR_CPUS\n");
-		prom_halt();
-	}
-
-	current_thread_info()->cpu = cpu;
-	__local_per_cpu_offset = __per_cpu_offset(cpu);
-
-	cpu_set(smp_processor_id(), cpu_online_map);
-	cpu_set(smp_processor_id(), phys_cpu_present_map);
 }
 
 int __devinit __cpu_up(unsigned int cpu)
@@ -1433,4 +1420,7 @@ void __init setup_per_cpu_areas(void)
 
 	for (i = 0; i < NR_CPUS; i++, ptr += size)
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+
+	/* Setup %g5 for the boot cpu.  */
+	__local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
 }
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 358e4d309ceb179776f2a7036dabce02cdfad1d2..c2059a3a06216cb936a163255501e20d464c6b21 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -159,17 +159,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 #define lazy_mmu_prot_update(pte)	do { } while (0)
 #endif
 
-#ifndef __HAVE_ARCH_MULTIPLE_ZERO_PAGE
+#ifndef __HAVE_ARCH_MOVE_PTE
 #define move_pte(pte, prot, old_addr, new_addr)	(pte)
-#else
-#define move_pte(pte, prot, old_addr, new_addr)				\
-({									\
- 	pte_t newpte = (pte);						\
-	if (pte_present(pte) && pfn_valid(pte_pfn(pte)) &&		\
-			pte_page(pte) == ZERO_PAGE(old_addr))		\
-		newpte = mk_pte(ZERO_PAGE(new_addr), (prot));		\
-	newpte;								\
-})
 #endif
 
 /*
diff --git a/include/asm-mips/pgtable.h b/include/asm-mips/pgtable.h
index 174a3cda8c2605fe1b0c7c5c425bcea293eb948c..f80fe75c780063c28805cd4ee183a2f3c0ab7e95 100644
--- a/include/asm-mips/pgtable.h
+++ b/include/asm-mips/pgtable.h
@@ -70,7 +70,15 @@ extern unsigned long zero_page_mask;
 #define ZERO_PAGE(vaddr) \
 	(virt_to_page(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask)))
 
-#define __HAVE_ARCH_MULTIPLE_ZERO_PAGE
+#define __HAVE_ARCH_MOVE_PTE
+#define move_pte(pte, prot, old_addr, new_addr)				\
+({									\
+ 	pte_t newpte = (pte);						\
+	if (pte_present(pte) && pfn_valid(pte_pfn(pte)) &&		\
+			pte_page(pte) == ZERO_PAGE(old_addr))		\
+		newpte = mk_pte(ZERO_PAGE(new_addr), (prot));		\
+	newpte;								\
+})
 
 extern void paging_init(void);
 
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index c44e7466534e9dab3f4d1cd3c5936cec1b45a80b..cd464f469a2c2b27fe5d157e684e30b2b4fbf051 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -689,6 +689,23 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *p
 #define pte_clear(mm,addr,ptep)		\
 	set_pte_at((mm), (addr), (ptep), __pte(0UL))
 
+#ifdef DCACHE_ALIASING_POSSIBLE
+#define __HAVE_ARCH_MOVE_PTE
+#define move_pte(pte, prot, old_addr, new_addr)				\
+({									\
+ 	pte_t newpte = (pte);						\
+	if (tlb_type != hypervisor && pte_present(pte)) {		\
+		unsigned long this_pfn = pte_pfn(pte);			\
+									\
+		if (pfn_valid(this_pfn) &&				\
+		    (((old_addr) ^ (new_addr)) & (1 << 13)))		\
+			flush_dcache_page_all(current->mm,		\
+					      pfn_to_page(this_pfn));	\
+	}								\
+	newpte;								\
+})
+#endif
+
 extern pgd_t swapper_pg_dir[2048];
 extern pmd_t swapper_low_pmd_dir[2048];