From: Martin Schwidefsky <schwidefsky@de.ibm.com>

Hi,
I'd like to propose a small change in the common memory management that
would enable s390 to get its dirty bits finally right. The change is a
architecture hook in SetPageUptodate.

The problem I want to solve: s390 does not keept its dirty bits in the
page table entries like other architectures do but in the "storage key".
The storage key is a 8 bit value associated with each physical(!) page.
It is accessed with some special instructions (iske, ivsk, sske, rrbe).
The storage key contains the access-control bits, the fetch-protection
bit, the referenced bit and the change bit (=dirty bit). Linux only
uses the referenced and the change/dirty bit.
This means that each physical page always has a dirty bit. On i386 a
page is implicitly clean if no page table entry points to it (and
PG_dirty is 0). This is not true on s390. A new page without any pte
pointing to it usually starts off dirty because nobody has reset the
dirty bit in the storage key. Worse, a write access due to i/o will
set the dirty bit! We need to clear the dirty bit somewhere or we'll
end up writing every page back to the disk before it becomes clean.
Up to now s390 uses a special bit in the pte that is set in mk_pte
for the first user of a page and makes set_pte to clear the storage
key. The problem is that this is a race condition if two processes
want to access the same page simultaneously. Then the page count is
already > 1 in mk_pte and nobody will clear the storage key. It
doesn't lead to any data loss because what happens is that a clean
page is considered dirty and is written back to the disk. The worst
scenario is a read only disk where this results in i/o errors (but no
data loss). 
Our solution is to move the clearing of the storage key (dirty bit)
from set_pte to SetPageUptodate. A patch that implements this is
attached. What do you think ?




 25-akpm/include/asm-s390/pgtable.h |   33 ++++++++-------------------------
 25-akpm/include/linux/page-flags.h |   11 ++++++++++-
 2 files changed, 18 insertions(+), 26 deletions(-)

diff -puN include/asm-s390/pgtable.h~s390-dirty-bit-cleaning include/asm-s390/pgtable.h
--- 25/include/asm-s390/pgtable.h~s390-dirty-bit-cleaning	Fri May 23 15:53:04 2003
+++ 25-akpm/include/asm-s390/pgtable.h	Fri May 23 15:53:04 2003
@@ -212,8 +212,7 @@ extern char empty_zero_page[PAGE_SIZE];
 #define _PAGE_INVALID   0x400          /* HW invalid                       */
 
 /* Software bits in the page table entry */
-#define _PAGE_MKCLEAN   0x002
-#define _PAGE_ISCLEAN   0x004
+#define _PAGE_ISCLEAN   0x002
 
 /* Mask and four different kinds of invalid pages. */
 #define _PAGE_INVALID_MASK	0x601
@@ -320,15 +319,6 @@ extern char empty_zero_page[PAGE_SIZE];
  */
 extern inline void set_pte(pte_t *pteptr, pte_t pteval)
 {
-	if ((pte_val(pteval) & (_PAGE_MKCLEAN|_PAGE_INVALID))
-	    == _PAGE_MKCLEAN) 
-	{
-		pte_val(pteval) &= ~_PAGE_MKCLEAN;
-               
-		asm volatile ("sske %0,%1" 
-				: : "d" (0), "a" (pte_val(pteval)));
-	}
-
 	*pteptr = pteval;
 }
 
@@ -501,7 +491,7 @@ extern inline pte_t pte_mkdirty(pte_t pt
 	 * sske instruction is slow. It is faster to let the
 	 * next instruction set the dirty bit.
 	 */
-	pte_val(pte) &= ~(_PAGE_MKCLEAN | _PAGE_ISCLEAN);
+	pte_val(pte) &= ~ _PAGE_ISCLEAN;
 	return pte;
 }
 
@@ -582,30 +572,23 @@ static inline pte_t mk_pte_phys(unsigned
 	pgprot_t __pgprot = (pgprot);					  \
 	unsigned long __physpage = __pa((__page-mem_map) << PAGE_SHIFT);  \
 	pte_t __pte = mk_pte_phys(__physpage, __pgprot);                  \
-	                                                                  \
-	if (!(pgprot_val(__pgprot) & _PAGE_ISCLEAN)) {			  \
-		int __users = !!PagePrivate(__page) + !!__page->mapping;  \
-		if (__users + page_count(__page) == 1)                    \
-			pte_val(__pte) |= _PAGE_MKCLEAN;                  \
-	}								  \
 	__pte;                                                            \
 })
 
 #define pfn_pte(pfn, pgprot)                                              \
 ({                                                                        \
-	struct page *__page = mem_map+(pfn);                              \
 	pgprot_t __pgprot = (pgprot);					  \
 	unsigned long __physpage = __pa((pfn) << PAGE_SHIFT);             \
 	pte_t __pte = mk_pte_phys(__physpage, __pgprot);                  \
-	                                                                  \
-	if (!(pgprot_val(__pgprot) & _PAGE_ISCLEAN)) {			  \
-		int __users = !!PagePrivate(__page) + !!__page->mapping;  \
-		if (__users + page_count(__page) == 1)                    \
-			pte_val(__pte) |= _PAGE_MKCLEAN;                  \
-	}								  \
 	__pte;                                                            \
 })
 
+#define arch_set_page_uptodate(__page)					  \
+	do {								  \
+		asm volatile ("sske %0,%1" : : "d" (0),			  \
+			      "a" (__pa((__page-mem_map) << PAGE_SHIFT)));\
+	} while (0)
+
 #ifdef __s390x__
 
 #define pfn_pmd(pfn, pgprot)                                              \
diff -puN include/linux/page-flags.h~s390-dirty-bit-cleaning include/linux/page-flags.h
--- 25/include/linux/page-flags.h~s390-dirty-bit-cleaning	Fri May 23 15:53:04 2003
+++ 25-akpm/include/linux/page-flags.h	Fri May 23 15:53:04 2003
@@ -7,6 +7,7 @@
 
 #include <linux/percpu.h>
 #include <linux/cache.h>
+#include <asm/pgtable.h>
 
 /*
  * Various page->flags bits:
@@ -158,8 +159,16 @@ extern void get_full_page_state(struct p
 #define ClearPageReferenced(page)	clear_bit(PG_referenced, &(page)->flags)
 #define TestClearPageReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags)
 
+#ifndef arch_set_page_uptodate
+#define arch_set_page_uptodate(page)
+#endif
+
 #define PageUptodate(page)	test_bit(PG_uptodate, &(page)->flags)
-#define SetPageUptodate(page)	set_bit(PG_uptodate, &(page)->flags)
+#define SetPageUptodate(page) \
+	do {								\
+		arch_set_page_uptodate(page);				\
+		set_bit(PG_uptodate, &(page)->flags);			\
+	} while (0)
 #define ClearPageUptodate(page)	clear_bit(PG_uptodate, &(page)->flags)
 
 #define PageDirty(page)		test_bit(PG_dirty, &(page)->flags)

_