per-vma rss and anon statistics, bitches

Signed-Off-By: Robert "Black Ninja" Love <rml@novell.com>

 fs/proc/rss        |    9 +++++++++
 fs/proc/task_mmu.c |    6 ++++--
 include/linux/mm.h |   23 +++++++++++++++++++++++
 mm/fremap.c        |    4 ++--
 mm/memory.c        |   12 +++++++-----
 mm/mmap.c          |   26 +++++++++++++++++++++++++-
 mm/rmap.c          |    7 +++++--
 mm/swapfile.c      |    2 +-
 8 files changed, 76 insertions(+), 13 deletions(-)

diff -urN linux-2.6.8-20041108112901/fs/proc/task_mmu.c linux-rss/fs/proc/task_mmu.c
--- linux-2.6.8-20041108112901/fs/proc/task_mmu.c	2004-11-08 13:13:05.000000000 -0500
+++ linux-rss/fs/proc/task_mmu.c	2005-02-25 11:43:37.335859152 -0500
@@ -57,7 +57,7 @@
 		ino = inode->i_ino;
 	}
 
-	seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
+	seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %08lx %08lx %n",
 			map->vm_start,
 			map->vm_end,
 			flags & VM_READ ? 'r' : '-',
@@ -65,7 +65,9 @@
 			flags & VM_EXEC ? 'x' : '-',
 			flags & VM_MAYSHARE ? 's' : 'p',
 			map->vm_pgoff << PAGE_SHIFT,
-			MAJOR(dev), MINOR(dev), ino, &len);
+			MAJOR(dev), MINOR(dev), ino,
+			map->rss << (PAGE_SHIFT-10),
+			map->anon_rss << (PAGE_SHIFT-10), &len);
 
 	if (map->vm_file) {
 		len = 25 + sizeof(void*) * 6 - len;
diff -urN linux-2.6.8-20041108112901/include/linux/mm.h linux-rss/include/linux/mm.h
--- linux-2.6.8-20041108112901/include/linux/mm.h	2004-11-08 13:13:16.000000000 -0500
+++ linux-rss/include/linux/mm.h	2005-02-25 11:45:33.159251320 -0500
@@ -68,6 +68,9 @@
 	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
 	unsigned long vm_flags;		/* Flags, listed below. */
 
+	unsigned long rss;		/* RSS pages */
+	unsigned long anon_rss;		/* anonymous (COW) pages */
+
 	struct rb_node vm_rb;
 
 	/*
@@ -797,6 +800,26 @@
 							-vma_pages(vma));
 }
 
+/*
+ * rss_inc - increment the number of RSS pages globally for this address space
+ * and specifically for this VMA.
+ */
+static inline void rss_inc(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+	mm->rss++;
+	vma->rss++;
+}
+
+/*
+ * rss_dec - decrement the number of RSS pages globally for this address space
+ * and specifically for this VMA.
+ */
+static inline void rss_dec(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+	mm->rss--;
+	vma->rss--;
+}
+
 #ifndef CONFIG_DEBUG_PAGEALLOC
 static inline void
 kernel_map_pages(struct page *page, int numpages, int enable)
diff -urN linux-2.6.8-20041108112901/mm/fremap.c linux-rss/mm/fremap.c
--- linux-2.6.8-20041108112901/mm/fremap.c	2004-11-08 13:13:06.000000000 -0500
+++ linux-rss/mm/fremap.c	2005-02-24 16:25:08.960738336 -0500
@@ -38,7 +38,7 @@
 					set_page_dirty(page);
 				page_remove_rmap(page);
 				page_cache_release(page);
-				mm->rss--;
+				rss_dec(mm, vma);
 			}
 		}
 	} else {
@@ -86,7 +86,7 @@
 
 	zap_pte(mm, vma, addr, pte);
 
-	mm->rss++;
+	rss_inc(mm, vma);
 	flush_icache_page(vma, page);
 	set_pte(pte, mk_pte(page, prot));
 	page_add_file_rmap(page);
diff -urN linux-2.6.8-20041108112901/mm/memory.c linux-rss/mm/memory.c
--- linux-2.6.8-20041108112901/mm/memory.c	2004-11-08 13:13:08.000000000 -0500
+++ linux-rss/mm/memory.c	2005-02-25 11:45:37.159643168 -0500
@@ -236,7 +236,7 @@
 		pmd_t * src_pmd, * dst_pmd;
 
 		src_pgd++; dst_pgd++;
-		
+
 		/* copy_pmd_range */
 		
 		if (pgd_none(*src_pgd))
@@ -1095,8 +1095,10 @@
 	spin_lock(&mm->page_table_lock);
 	page_table = pte_offset_map(pmd, address);
 	if (likely(pte_same(*page_table, pte))) {
+		if (PageAnon(old_page))
+			vma->anon_rss--;
 		if (PageReserved(old_page))
-			++mm->rss;
+			rss_inc(mm, vma);
 		else
 			page_remove_rmap(old_page);
 		break_cow(vma, new_page, address, page_table);
@@ -1378,7 +1380,7 @@
 	if (vm_swap_full())
 		remove_exclusive_swap_page(page);
 
-	mm->rss++;
+	rss_inc(mm, vma);
 	pte = mk_pte(page, vma->vm_page_prot);
 	if (write_access && can_share_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -1443,7 +1445,7 @@
 			spin_unlock(&mm->page_table_lock);
 			goto out;
 		}
-		mm->rss++;
+		rss_inc(mm, vma);
 		entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
 							 vma->vm_page_prot)),
 				      vma);
@@ -1552,7 +1554,7 @@
 	/* Only go through if we didn't race with anybody else... */
 	if (pte_none(*page_table)) {
 		if (!PageReserved(new_page))
-			++mm->rss;
+			rss_inc(mm, vma);
 		flush_icache_page(vma, new_page);
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		if (write_access)
diff -urN linux-2.6.8-20041108112901/mm/mmap.c linux-rss/mm/mmap.c
--- linux-2.6.8-20041108112901/mm/mmap.c	2004-11-08 13:13:16.000000000 -0500
+++ linux-rss/mm/mmap.c	2005-02-25 11:41:52.403811248 -0500
@@ -1618,13 +1618,14 @@
 
 /*
  * Split a vma into two pieces at address 'addr', a new vma is allocated
- * either for the first part or the the tail.
+ * either for the first part or the tail.
  */
 int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 	      unsigned long addr, int new_below)
 {
 	struct mempolicy *pol;
 	struct vm_area_struct *new;
+	unsigned long rss, anon;
 
 	if (mm->map_count >= sysctl_max_map_count)
 		return -ENOMEM;
@@ -1643,6 +1644,29 @@
 		new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
 	}
 
+	/*
+	 * Hack alert.  In splitting the VMA, we have two options wrt per-VMA
+	 * RSS accounting.  We can walk the page tables and reallocate the
+	 * RSS statistics back to each VMA on a per-page basis, or we can fudge
+	 * the stats, like below, and loss some per-VMA accuracy in the split
+	 * case but keep the aggregate number correct.
+	 */
+	rss = vma->rss;
+	if (rss) {
+		vma->rss = new->rss = rss / 2;
+		if (rss % 2 == 1)
+			vma->rss++;
+	}
+
+	/* Same deal for per-VMA Anon statistics */
+	anon = vma->anon_rss;
+	if (anon) {
+		vma->anon_rss = new->anon_rss = anon / 2;
+		if (anon % 2 == 1)
+			vma->anon_rss++;
+	}
+
+
 	pol = mpol_copy(vma_policy(vma));
 	if (IS_ERR(pol)) {
 		kmem_cache_free(vm_area_cachep, new);
diff -urN linux-2.6.8-20041108112901/mm/rmap.c linux-rss/mm/rmap.c
--- linux-2.6.8-20041108112901/mm/rmap.c	2004-11-08 13:13:15.000000000 -0500
+++ linux-rss/mm/rmap.c	2005-02-25 11:32:50.315221240 -0500
@@ -434,6 +434,8 @@
 	BUG_ON(PageReserved(page));
 	BUG_ON(!anon_vma);
 
+	vma->anon_rss++;
+
 	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
 	index = (address - vma->vm_start) >> PAGE_SHIFT;
 	index += vma->vm_pgoff;
@@ -581,9 +583,10 @@
 		swap_duplicate(entry);
 		set_pte(pte, swp_entry_to_pte(entry));
 		BUG_ON(pte_file(*pte));
+		vma->anon_rss++;
 	}
 
-	mm->rss--;
+	rss_dec(mm, vma);
 	page_remove_rmap(page);
 	page_cache_release(page);
 
@@ -683,7 +686,7 @@
 
 		page_remove_rmap(page);
 		page_cache_release(page);
-		mm->rss--;
+		rss_dec(mm, vma);
 		(*mapcount)--;
 	}
 
diff -urN linux-2.6.8-20041108112901/mm/swapfile.c linux-rss/mm/swapfile.c
--- linux-2.6.8-20041108112901/mm/swapfile.c	2004-11-08 13:13:15.000000000 -0500
+++ linux-rss/mm/swapfile.c	2005-02-24 14:56:15.005622768 -0500
@@ -434,7 +434,7 @@
 unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
 	swp_entry_t entry, struct page *page)
 {
-	vma->vm_mm->rss++;
+	rss_inc(vma->vm_mm, vma);
 	get_page(page);
 	set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
 	page_add_anon_rmap(page, vma, address);
