From: "Nakajima, Jun" <jun.nakajima@intel.com>

Attached is a patch that enables PNI (Prescott New Instructions)
monitor/mwait in the kernel idle.



 25-akpm/arch/i386/kernel/cpu/intel.c  |    4 ++-
 25-akpm/arch/i386/kernel/process.c    |   45 ++++++++++++++++++++++++++++++++++
 25-akpm/include/asm-i386/cpufeature.h |    2 +
 25-akpm/include/asm-i386/processor.h  |   16 ++++++++++++
 4 files changed, 66 insertions(+), 1 deletion(-)

diff -puN arch/i386/kernel/cpu/intel.c~idle-using-monitor-mwait arch/i386/kernel/cpu/intel.c
--- 25/arch/i386/kernel/cpu/intel.c~idle-using-monitor-mwait	Fri Sep  5 10:08:19 2003
+++ 25-akpm/arch/i386/kernel/cpu/intel.c	Fri Sep  5 10:08:19 2003
@@ -13,6 +13,8 @@
 
 extern int trap_init_f00f_bug(void);
 
+extern void select_idle_routine(const struct cpuinfo_x86 *c);
+
 #ifdef CONFIG_X86_INTEL_USERCOPY
 /*
  * Alignment at which movsl is preferred for bulk memory copies.
@@ -164,7 +166,7 @@ static void __init init_intel(struct cpu
 	}
 #endif
 
-
+	select_idle_routine(c);
 	if (c->cpuid_level > 1) {
 		/* supports eax=2  call */
 		int i, j, n;
diff -puN arch/i386/kernel/process.c~idle-using-monitor-mwait arch/i386/kernel/process.c
--- 25/arch/i386/kernel/process.c~idle-using-monitor-mwait	Fri Sep  5 10:08:19 2003
+++ 25-akpm/arch/i386/kernel/process.c	Fri Sep  5 10:08:19 2003
@@ -151,11 +151,56 @@ void cpu_idle (void)
 	}
 }
 
+/*
+ * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
+ * which can obviate IPI to trigger checking of need_resched.
+ * We execute MONITOR against need_resched and enter optimized wait state
+ * through MWAIT. Whenever someone changes need_resched, we would be woken
+ * up from MWAIT (without an IPI).
+ */
+static void mwait_idle(void)
+{
+	local_irq_enable();
+
+	if (!need_resched()) {
+		set_thread_flag(TIF_POLLING_NRFLAG);
+		do {
+			__monitor((void *)&current_thread_info()->flags, 0, 0);
+			if (need_resched())
+				break;
+			__mwait(0, 0);
+		} while (!need_resched());
+		clear_thread_flag(TIF_POLLING_NRFLAG);
+	}
+}
+
+void __init select_idle_routine(const struct cpuinfo_x86 *c)
+{
+	if (cpu_has(c, X86_FEATURE_MWAIT)) {
+		printk("Monitor/Mwait feature present.\n");
+		/*
+		 * Skip, if setup has overridden idle.
+		 * Also, take care of system with asymmetric CPUs.
+		 * Use, mwait_idle only if all cpus support it.
+		 * If not, we fallback to default_idle()
+		 */
+		if (!pm_idle) {
+			pm_idle = mwait_idle;
+		}
+		return;
+	}
+	pm_idle = default_idle;
+	return;
+}
+
 static int __init idle_setup (char *str)
 {
 	if (!strncmp(str, "poll", 4)) {
 		printk("using polling idle threads.\n");
 		pm_idle = poll_idle;
+	} else if (!strncmp(str, "halt", 4)) {
+		printk("using halt in idle threads.\n");
+		pm_idle = default_idle;
 	}
 
 	return 1;
diff -puN include/asm-i386/cpufeature.h~idle-using-monitor-mwait include/asm-i386/cpufeature.h
--- 25/include/asm-i386/cpufeature.h~idle-using-monitor-mwait	Fri Sep  5 10:08:19 2003
+++ 25-akpm/include/asm-i386/cpufeature.h	Fri Sep  5 10:08:19 2003
@@ -71,6 +71,8 @@
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_EST		(4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_MWAIT	(4*32+ 3) /* Monitor/Mwait support */
+
 
 /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
 #define X86_FEATURE_XSTORE	(5*32+ 2) /* on-CPU RNG present (xstore insn) */
diff -puN include/asm-i386/processor.h~idle-using-monitor-mwait include/asm-i386/processor.h
--- 25/include/asm-i386/processor.h~idle-using-monitor-mwait	Fri Sep  5 10:08:19 2003
+++ 25-akpm/include/asm-i386/processor.h	Fri Sep  5 10:10:25 2003
@@ -272,6 +272,22 @@ extern int MCA_bus;
 #define pc98 0
 #endif
 
+static inline void __monitor(const void *eax, unsigned long ecx,
+		unsigned long edx)
+{
+	/* "monitor %eax,%ecx,%edx;" */
+	asm volatile(
+		".byte 0x0f,0x01,0xc8;"
+		: :"a" (eax), "c" (ecx), "d"(edx));
+}
+
+static inline void __mwait(unsigned long eax, unsigned long ecx)
+{
+	/* "mwait %eax,%ecx;" */
+	asm volatile(
+		".byte 0x0f,0x01,0xc9;"
+		: :"a" (eax), "c" (ecx));
+}
 
 /* from system description table in BIOS.  Mostly for MCA use, but
 others may find it useful. */

_