From: Nikita Danilov <Nikita@Namesys.COM>

The vmscan logic at present will scan the inactive list with increasing
priority until a threshold is triggered.  At that threshold we start
unmapping pages from pagetables.

The problem is that each time someone calls into this code, the priority is
initially low, so some mapped pages will be refiled event hough we really
should be unmapping them now.

Nikita's patch adds the `pressure' field to struct zone.  it is a decaying
average of the zone's memory pressure and allows us to start unmapping pages
immediately on entry to page reclaim, based on measurements which were made
in earlier reclaim attempts.



 include/linux/mmzone.h |   12 ++++++++++++
 mm/vmscan.c            |   33 +++++++++++++++++++++++++++++++--
 2 files changed, 43 insertions(+), 2 deletions(-)

diff -puN include/linux/mmzone.h~zone-pressure include/linux/mmzone.h
--- 25/include/linux/mmzone.h~zone-pressure	2003-07-27 00:13:07.000000000 -0700
+++ 25-akpm/include/linux/mmzone.h	2003-07-27 00:13:07.000000000 -0700
@@ -89,6 +89,18 @@ struct zone {
 
 	ZONE_PADDING(_pad2_)
 
+ 	/*
+	 * measure of scanning intensity for this zone. It is calculated
+	 * as exponentially decaying average of the scanning priority
+	 * required to free enough pages in this zone
+	 * (zone_adj_pressure()).
+	 *
+	 *     0                    --- low pressure
+	 *
+	 *     (DEF_PRIORITY << 10) --- high pressure
+	 */
+	int pressure;
+
 	/*
 	 * free areas of different sizes
 	 */
diff -puN mm/vmscan.c~zone-pressure mm/vmscan.c
--- 25/mm/vmscan.c~zone-pressure	2003-07-27 00:13:07.000000000 -0700
+++ 25-akpm/mm/vmscan.c	2003-07-27 00:13:07.000000000 -0700
@@ -80,6 +80,20 @@ static long total_memory;
 #endif
 
 /*
+ * exponentially decaying average
+ */
+static inline int expavg(int avg, int val)
+{
+	return ((val - avg) >> 1) + avg;
+}
+
+static void zone_adj_pressure(struct zone *zone, int priority)
+{
+	zone->pressure = expavg(zone->pressure,
+			(DEF_PRIORITY - priority) << 10);
+}
+
+/*
  * The list of shrinker callbacks used by to apply pressure to
  * ageable caches.
  */
@@ -794,8 +808,10 @@ shrink_caches(struct zone *classzone, in
 		ret += shrink_zone(zone, max_scan, gfp_mask,
 				to_reclaim, &nr_mapped, ps, priority);
 		*total_scanned += max_scan + nr_mapped;
-		if (ret >= nr_pages)
+		if (ret >= nr_pages) {
+			zone_adj_pressure(zone, priority);
 			break;
+		}
 	}
 	return ret;
 }
@@ -824,6 +840,7 @@ int try_to_free_pages(struct zone *cz,
 	int ret = 0;
 	const int nr_pages = SWAP_CLUSTER_MAX;
 	int nr_reclaimed = 0;
+	struct zone *zone;
 	struct reclaim_state *reclaim_state = current->reclaim_state;
 
 	inc_page_state(allocstall);
@@ -860,6 +877,8 @@ int try_to_free_pages(struct zone *cz,
 	}
 	if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY))
 		out_of_memory();
+	for (zone = cz; zone >= cz->zone_pgdat->node_zones; -- zone)
+		zone_adj_pressure(zone, -1);
 out:
 	return ret;
 }
@@ -907,8 +926,10 @@ static int balance_pgdat(pg_data_t *pgda
 				to_reclaim = min(to_free, SWAP_CLUSTER_MAX*8);
 			} else {			/* Zone balancing */
 				to_reclaim = zone->pages_high-zone->free_pages;
-				if (to_reclaim <= 0)
+				if (to_reclaim <= 0) {
+					zone_adj_pressure(zone, priority);
 					continue;
+				}
 			}
 			all_zones_ok = 0;
 			max_scan = zone->nr_inactive >> priority;
@@ -933,6 +954,14 @@ static int balance_pgdat(pg_data_t *pgda
 		if (to_free)
 			blk_congestion_wait(WRITE, HZ/10);
 	}
+	if (priority < 0) {
+		for (i = 0; i < pgdat->nr_zones; i++) {
+			struct zone *zone = pgdat->node_zones + i;
+
+			if (zone->free_pages < zone->pages_high)
+				zone_adj_pressure(zone, -1);
+		}
+	}
 	return nr_pages - to_free;
 }
 

_