per-vma rss accounting, bitches

Signed-Off-By: Robert "Sly Fox" Love <rml@novell.com>

 fs/proc/task_mmu.c |    5 +++--
 include/linux/mm.h |   22 ++++++++++++++++++++++
 mm/fremap.c        |    4 ++--
 mm/memory.c        |   11 ++++++-----
 mm/mmap.c          |   17 ++++++++++++++++-
 mm/rmap.c          |    4 ++--
 mm/swapfile.c      |    2 +-
 7 files changed, 52 insertions(+), 13 deletions(-)

diff -urN linux-2.6.8-20041108112901/fs/proc/task_mmu.c linux-rss/fs/proc/task_mmu.c
--- linux-2.6.8-20041108112901/fs/proc/task_mmu.c	2004-11-08 13:13:05.000000000 -0500
+++ linux-rss/fs/proc/task_mmu.c	2005-02-24 16:23:16.650812040 -0500
@@ -57,7 +57,7 @@
 		ino = inode->i_ino;
 	}
 
-	seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
+	seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %0lx %n",
 			map->vm_start,
 			map->vm_end,
 			flags & VM_READ ? 'r' : '-',
@@ -65,7 +65,8 @@
 			flags & VM_EXEC ? 'x' : '-',
 			flags & VM_MAYSHARE ? 's' : 'p',
 			map->vm_pgoff << PAGE_SHIFT,
-			MAJOR(dev), MINOR(dev), ino, &len);
+			MAJOR(dev), MINOR(dev), ino,
+			map->rss << (PAGE_SHIFT-10), &len);
 
 	if (map->vm_file) {
 		len = 25 + sizeof(void*) * 6 - len;
diff -urN linux-2.6.8-20041108112901/include/linux/mm.h linux-rss/include/linux/mm.h
--- linux-2.6.8-20041108112901/include/linux/mm.h	2004-11-08 13:13:16.000000000 -0500
+++ linux-rss/include/linux/mm.h	2005-02-24 15:12:01.485735920 -0500
@@ -68,6 +68,8 @@
 	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
 	unsigned long vm_flags;		/* Flags, listed below. */
 
+	unsigned long rss;		/* RSS pages */
+
 	struct rb_node vm_rb;
 
 	/*
@@ -797,6 +799,26 @@
 							-vma_pages(vma));
 }
 
+/*
+ * rss_inc - increment the number of RSS pages globally for this address space
+ * and specifically for this VMA.
+ */
+static inline void rss_inc(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+	mm->rss++;
+	vma->rss++;
+}
+
+/*
+ * rss_dec - decrement the number of RSS pages globally for this address space
+ * and specifically for this VMA.
+ */
+static inline void rss_dec(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+	mm->rss--;
+	vma->rss--;
+}
+
 #ifndef CONFIG_DEBUG_PAGEALLOC
 static inline void
 kernel_map_pages(struct page *page, int numpages, int enable)
diff -urN linux-2.6.8-20041108112901/mm/fremap.c linux-rss/mm/fremap.c
--- linux-2.6.8-20041108112901/mm/fremap.c	2004-11-08 13:13:06.000000000 -0500
+++ linux-rss/mm/fremap.c	2005-02-24 16:25:08.960738336 -0500
@@ -38,7 +38,7 @@
 					set_page_dirty(page);
 				page_remove_rmap(page);
 				page_cache_release(page);
-				mm->rss--;
+				rss_dec(mm, vma);
 			}
 		}
 	} else {
@@ -86,7 +86,7 @@
 
 	zap_pte(mm, vma, addr, pte);
 
-	mm->rss++;
+	rss_inc(mm, vma);
 	flush_icache_page(vma, page);
 	set_pte(pte, mk_pte(page, prot));
 	page_add_file_rmap(page);
diff -urN linux-2.6.8-20041108112901/mm/memory.c linux-rss/mm/memory.c
--- linux-2.6.8-20041108112901/mm/memory.c	2004-11-08 13:13:08.000000000 -0500
+++ linux-rss/mm/memory.c	2005-02-24 16:40:09.647813088 -0500
@@ -236,7 +236,7 @@
 		pmd_t * src_pmd, * dst_pmd;
 
 		src_pgd++; dst_pgd++;
-		
+
 		/* copy_pmd_range */
 		
 		if (pgd_none(*src_pgd))
@@ -325,6 +325,7 @@
 					pte = pte_mkclean(pte);
 				pte = pte_mkold(pte);
 				get_page(page);
+				/* no VMA to increment ? */
 				dst->rss++;
 				set_pte(dst_pte, pte);
 				page_dup_rmap(page);
@@ -1096,7 +1097,7 @@
 	page_table = pte_offset_map(pmd, address);
 	if (likely(pte_same(*page_table, pte))) {
 		if (PageReserved(old_page))
-			++mm->rss;
+			rss_inc(mm, vma);
 		else
 			page_remove_rmap(old_page);
 		break_cow(vma, new_page, address, page_table);
@@ -1378,7 +1379,7 @@
 	if (vm_swap_full())
 		remove_exclusive_swap_page(page);
 
-	mm->rss++;
+	rss_inc(mm, vma);
 	pte = mk_pte(page, vma->vm_page_prot);
 	if (write_access && can_share_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -1443,7 +1444,7 @@
 			spin_unlock(&mm->page_table_lock);
 			goto out;
 		}
-		mm->rss++;
+		rss_inc(mm, vma);
 		entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
 							 vma->vm_page_prot)),
 				      vma);
@@ -1552,7 +1553,7 @@
 	/* Only go through if we didn't race with anybody else... */
 	if (pte_none(*page_table)) {
 		if (!PageReserved(new_page))
-			++mm->rss;
+			rss_inc(mm, vma);
 		flush_icache_page(vma, new_page);
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		if (write_access)
diff -urN linux-2.6.8-20041108112901/mm/mmap.c linux-rss/mm/mmap.c
--- linux-2.6.8-20041108112901/mm/mmap.c	2004-11-08 13:13:16.000000000 -0500
+++ linux-rss/mm/mmap.c	2005-02-24 16:41:17.760458392 -0500
@@ -1618,13 +1618,14 @@
 
 /*
  * Split a vma into two pieces at address 'addr', a new vma is allocated
- * either for the first part or the the tail.
+ * either for the first part or the tail.
  */
 int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 	      unsigned long addr, int new_below)
 {
 	struct mempolicy *pol;
 	struct vm_area_struct *new;
+	unsigned long rss;
 
 	if (mm->map_count >= sysctl_max_map_count)
 		return -ENOMEM;
@@ -1643,6 +1644,20 @@
 		new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
 	}
 
+	/*
+	 * Hack alert.  In splitting the VMA, we have two options wrt per-VMA
+	 * RSS accounting.  We can walk the page tables and reallocate the
+	 * RSS statistics back to each VMA on a per-page basis, or we can fudge
+	 * the stats, like below, and loss some per-VMA accuracy in the split
+	 * case but keep the aggregate number correct.
+	 */
+	rss = vma->rss;
+	if (rss) {
+		vma->rss = new->rss = rss / 2;
+		if (rss % 2 == 1)
+			vma->rss++;
+	}
+
 	pol = mpol_copy(vma_policy(vma));
 	if (IS_ERR(pol)) {
 		kmem_cache_free(vm_area_cachep, new);
diff -urN linux-2.6.8-20041108112901/mm/rmap.c linux-rss/mm/rmap.c
--- linux-2.6.8-20041108112901/mm/rmap.c	2004-11-08 13:13:15.000000000 -0500
+++ linux-rss/mm/rmap.c	2005-02-24 14:54:09.876645264 -0500
@@ -583,7 +583,7 @@
 		BUG_ON(pte_file(*pte));
 	}
 
-	mm->rss--;
+	rss_dec(mm, vma);
 	page_remove_rmap(page);
 	page_cache_release(page);
 
@@ -683,7 +683,7 @@
 
 		page_remove_rmap(page);
 		page_cache_release(page);
-		mm->rss--;
+		rss_dec(mm, vma);
 		(*mapcount)--;
 	}
 
diff -urN linux-2.6.8-20041108112901/mm/swapfile.c linux-rss/mm/swapfile.c
--- linux-2.6.8-20041108112901/mm/swapfile.c	2004-11-08 13:13:15.000000000 -0500
+++ linux-rss/mm/swapfile.c	2005-02-24 14:56:15.005622768 -0500
@@ -434,7 +434,7 @@
 unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
 	swp_entry_t entry, struct page *page)
 {
-	vma->vm_mm->rss++;
+	rss_inc(vma->vm_mm, vma);
 	get_page(page);
 	set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
 	page_add_anon_rmap(page, vma, address);