<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">
From: Nick Piggin &lt;piggin@cyberone.com.au&gt;

This one gathers better statistics about the new process problem.  It
improves estimation for initial process IO.  That is, better calculations
for whether it will be worth waiting after a process submits its first
read.

This is done with a per queue average thinktime and seek time for a second
read submitted from a process.

When combined with 3/3, numbers are around the same as mm1 for most long
lived tasks, but much better for things like the top 4 benchmarks.

Probably wants rwhron and the OSDL database guys to give it some testing.

test                                    2.6.0-test9-mm1  2.6.0-test9-mm1-np
Cat kernel source during seq read       0:26.89          0:24.75
Cat kernel source during seq write      9:17.80          0:23.48
ls -lr kernel source during seq read    0:11.03          0:14.68
ls -lr kernel source during seq write   0:49.95          0:08.06

contest no_load                         143s 0 loads     144s 0 loads
contest io_load                         193s 40.2 loads  193s 40.1 loads
contest read_load                       186s 11.6 loads  190s 10.5 loads
contest list_load                       201s 5.0 loads   200s 5.0 loads

pgbench 1 client                        31.3TPS          31.5TPS
pgbench 4 clients                       37.7TPS          37.5TPS
pgbench 16 clients                      42.1TPS          48.1TPS




 25-akpm/drivers/block/as-iosched.c |  209 +++++++++++++++++++++++--------------
 1 files changed, 131 insertions(+), 78 deletions(-)

diff -puN drivers/block/as-iosched.c~as-new-process-estimation drivers/block/as-iosched.c
--- 25/drivers/block/as-iosched.c~as-new-process-estimation	Fri Nov  7 15:25:38 2003
+++ 25-akpm/drivers/block/as-iosched.c	Fri Nov  7 15:25:38 2003
@@ -70,6 +70,7 @@
 /* Bits in as_io_context.state */
 enum as_io_states {
 	AS_TASK_RUNNING=0,	/* Process has not exitted */
+	AS_TASK_IOSTARTED,	/* Process has started some IO */
 	AS_TASK_IORUNNING,	/* Process has completed some IO */
 };
 
@@ -99,6 +100,14 @@ struct as_data {
 	sector_t last_sector[2];	/* last REQ_SYNC &amp; REQ_ASYNC sectors */
 	struct list_head *dispatch;	/* driver dispatch queue */
 	struct list_head *hash;		/* request hash */
+
+	unsigned long exit_prob;	/* probability a task will exit while
+					   being waited on */
+	unsigned long new_ttime_total; 	/* mean thinktime on new proc */
+	unsigned long new_ttime_mean;
+	u64 new_seek_total;		/* mean seek on new proc */
+	sector_t new_seek_mean;
+
 	unsigned long current_batch_expires;
 	unsigned long last_check_fifo[2];
 	int changed_batch;		/* 1: waiting for old batch to end */
@@ -186,6 +195,7 @@ static void free_as_io_context(struct as
 /* Called when the task exits */
 static void exit_as_io_context(struct as_io_context *aic)
 {
+	WARN_ON(!test_bit(AS_TASK_RUNNING, &amp;aic-&gt;state));
 	clear_bit(AS_TASK_RUNNING, &amp;aic-&gt;state);
 }
 
@@ -608,8 +618,15 @@ static void as_antic_timeout(unsigned lo
 	spin_lock_irqsave(q-&gt;queue_lock, flags);
 	if (ad-&gt;antic_status == ANTIC_WAIT_REQ
 			|| ad-&gt;antic_status == ANTIC_WAIT_NEXT) {
+		struct as_io_context *aic = ad-&gt;io_context-&gt;aic;
+
 		ad-&gt;antic_status = ANTIC_FINISHED;
 		kblockd_schedule_work(&amp;ad-&gt;antic_work);
+
+		if (aic-&gt;ttime_samples == 0) {
+			/* process anticipated on has exitted or timed out*/
+			ad-&gt;exit_prob = (7*ad-&gt;exit_prob + 256)/8;
+		}
 	}
 	spin_unlock_irqrestore(q-&gt;queue_lock, flags);
 }
@@ -623,7 +640,7 @@ static int as_close_req(struct as_data *
 	unsigned long delay;	/* milliseconds */
 	sector_t last = ad-&gt;last_sector[ad-&gt;batch_data_dir];
 	sector_t next = arq-&gt;request-&gt;sector;
-	sector_t delta;	/* acceptable close offset (in sectors) */
+	sector_t delta; /* acceptable close offset (in sectors) */
 
 	if (ad-&gt;antic_status == ANTIC_OFF || !ad-&gt;ioc_finished)
 		delay = 0;
@@ -657,6 +674,15 @@ static int as_can_break_anticipation(str
 {
 	struct io_context *ioc;
 	struct as_io_context *aic;
+	sector_t s;
+
+	ioc = ad-&gt;io_context;
+	BUG_ON(!ioc);
+
+	if (arq &amp;&amp; ioc == arq-&gt;io_context) {
+		/* request from same process */
+		return 1;
+	}
 
 	if (arq &amp;&amp; arq-&gt;is_sync == REQ_SYNC &amp;&amp; as_close_req(ad, arq)) {
 		/* close request */
@@ -671,20 +697,14 @@ static int as_can_break_anticipation(str
 		return 1;
 	}
 
-	ioc = ad-&gt;io_context;
-	BUG_ON(!ioc);
-
-	if (arq &amp;&amp; ioc == arq-&gt;io_context) {
-		/* request from same process */
-		return 1;
-	}
-
 	aic = ioc-&gt;aic;
 	if (!aic)
 		return 0;
 
 	if (!test_bit(AS_TASK_RUNNING, &amp;aic-&gt;state)) {
 		/* process anticipated on has exitted */
+		if (aic-&gt;ttime_samples == 0)
+			ad-&gt;exit_prob = (7*ad-&gt;exit_prob + 256)/8;
 		return 1;
 	}
 
@@ -698,27 +718,36 @@ static int as_can_break_anticipation(str
 		return 1;
 	}
 
-	if (aic-&gt;seek_samples == 0 || aic-&gt;ttime_samples == 0) {
-		/*
-		 * Process has just started IO. Don't anticipate.
-		 * TODO! Must fix this up.
-		 */
-		return 1;
-	}
-
-	if (aic-&gt;ttime_mean &gt; ad-&gt;antic_expire) {
+	if (aic-&gt;ttime_samples == 0) {
+		if (ad-&gt;new_ttime_mean &gt; ad-&gt;antic_expire)
+			return 1;
+		if (ad-&gt;exit_prob &gt; 128)
+			return 1;
+	} else if (aic-&gt;ttime_mean &gt; ad-&gt;antic_expire) {
 		/* the process thinks too much between requests */
 		return 1;
 	}
 
-	if (arq &amp;&amp; aic-&gt;seek_samples) {
-		sector_t s;
-		if (ad-&gt;last_sector[REQ_SYNC] &lt; arq-&gt;request-&gt;sector)
-			s = arq-&gt;request-&gt;sector - ad-&gt;last_sector[REQ_SYNC];
-		else
-			s = ad-&gt;last_sector[REQ_SYNC] - arq-&gt;request-&gt;sector;
+	if (!arq)
+		return 0;
+
+	if (ad-&gt;last_sector[REQ_SYNC] &lt; arq-&gt;request-&gt;sector)
+		s = arq-&gt;request-&gt;sector - ad-&gt;last_sector[REQ_SYNC];
+	else
+		s = ad-&gt;last_sector[REQ_SYNC] - arq-&gt;request-&gt;sector;
+
+	if (aic-&gt;seek_samples == 0) {
+		/*
+		 * Process has just started IO. Use past statistics to
+		 * guage success possibility
+		 */
+		if (ad-&gt;new_seek_mean/2 &gt; s) {
+			/* this request is better than what we're expecting */
+			return 1;
+		}
 
-		if (aic-&gt;seek_mean &gt; (s&gt;&gt;1)) {
+	} else {
+		if (aic-&gt;seek_mean/2 &gt; s) {
 			/* this request is better than what we're expecting */
 			return 1;
 		}
@@ -763,12 +792,51 @@ static int as_can_anticipate(struct as_d
 	return 1;
 }
 
+static void as_update_thinktime(struct as_data *ad, struct as_io_context *aic, unsigned long ttime)
+{
+	/* fixed point: 1.0 == 1&lt;&lt;8 */
+	if (aic-&gt;ttime_samples == 0) {
+		ad-&gt;new_ttime_total = (7*ad-&gt;new_ttime_total + 256*ttime) / 8;
+		ad-&gt;new_ttime_mean = ad-&gt;new_ttime_total / 256;
+
+		ad-&gt;exit_prob = (7*ad-&gt;exit_prob)/8;
+	}
+	aic-&gt;ttime_samples = (7*aic-&gt;ttime_samples + 256) / 8;
+	aic-&gt;ttime_total = (7*aic-&gt;ttime_total + 256*ttime) / 8;
+	aic-&gt;ttime_mean = (aic-&gt;ttime_total + 128) / aic-&gt;ttime_samples;
+}
+
+static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic, sector_t sdist)
+{
+	u64 total;
+
+	if (aic-&gt;seek_samples == 0) {
+		ad-&gt;new_seek_total = (7*ad-&gt;new_seek_total + 256*(u64)sdist)/8;
+		ad-&gt;new_seek_mean = ad-&gt;new_seek_total / 256;
+	}
+
+	/*
+	 * Don't allow the seek distance to get too large from the
+	 * odd fragment, pagein, etc
+	 */
+	if (aic-&gt;seek_samples &lt;= 60) /* second&amp;third seek */
+		sdist = min(sdist, (aic-&gt;seek_mean * 4) + 2*1024*1024);
+	else
+		sdist = min(sdist, (aic-&gt;seek_mean * 4)	+ 2*1024*64);
+
+	aic-&gt;seek_samples = (7*aic-&gt;seek_samples + 256) / 8;
+	aic-&gt;seek_total = (7*aic-&gt;seek_total + (u64)256*sdist) / 8;
+	total = aic-&gt;seek_total + (aic-&gt;seek_samples/2);
+	do_div(total, aic-&gt;seek_samples);
+	aic-&gt;seek_mean = (sector_t)total;
+}
+
 /*
  * as_update_iohist keeps a decaying histogram of IO thinktimes, and
  * updates @aic-&gt;ttime_mean based on that. It is called when a new
  * request is queued.
  */
-static void as_update_iohist(struct as_io_context *aic, struct request *rq)
+static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, struct request *rq)
 {
 	struct as_rq *arq = RQ_DATA(rq);
 	int data_dir = arq-&gt;is_sync;
@@ -779,60 +847,29 @@ static void as_update_iohist(struct as_i
 		return;
 
 	if (data_dir == REQ_SYNC) {
+		unsigned long in_flight = atomic_read(&amp;aic-&gt;nr_queued)
+					+ atomic_read(&amp;aic-&gt;nr_dispatched);
 		spin_lock(&amp;aic-&gt;lock);
-
-		if (test_bit(AS_TASK_IORUNNING, &amp;aic-&gt;state)
-				&amp;&amp; !atomic_read(&amp;aic-&gt;nr_queued)
-				&amp;&amp; !atomic_read(&amp;aic-&gt;nr_dispatched)) {
+		if (test_bit(AS_TASK_IORUNNING, &amp;aic-&gt;state) ||
+			test_bit(AS_TASK_IOSTARTED, &amp;aic-&gt;state)) {
 			/* Calculate read -&gt; read thinktime */
-			thinktime = jiffies - aic-&gt;last_end_request;
-			thinktime = min(thinktime, MAX_THINKTIME-1);
-			/* fixed point: 1.0 == 1&lt;&lt;8 */
-			aic-&gt;ttime_samples += 256;
-			aic-&gt;ttime_total += 256*thinktime;
-			if (aic-&gt;ttime_samples)
-				/* fixed point factor is cancelled here */
-				aic-&gt;ttime_mean = (aic-&gt;ttime_total + 128)
-							/ aic-&gt;ttime_samples;
-			aic-&gt;ttime_samples = (aic-&gt;ttime_samples&gt;&gt;1)
-						+ (aic-&gt;ttime_samples&gt;&gt;2);
-			aic-&gt;ttime_total = (aic-&gt;ttime_total&gt;&gt;1)
-						+ (aic-&gt;ttime_total&gt;&gt;2);
-		}
-
-		/* Calculate read -&gt; read seek distance */
-		if (!aic-&gt;seek_samples)
-			seek_dist = 0;
-		else if (aic-&gt;last_request_pos &lt; rq-&gt;sector)
-			seek_dist = rq-&gt;sector - aic-&gt;last_request_pos;
-		else
-			seek_dist = aic-&gt;last_request_pos - rq-&gt;sector;
-
+			if (test_bit(AS_TASK_IORUNNING, &amp;aic-&gt;state)
+							&amp;&amp; in_flight == 0) {
+				thinktime = jiffies - aic-&gt;last_end_request;
+				thinktime = min(thinktime, MAX_THINKTIME-1);
+			} else
+				thinktime = 0;
+			as_update_thinktime(ad, aic, thinktime);
+
+			/* Calculate read -&gt; read seek distance */
+			if (aic-&gt;last_request_pos &lt; rq-&gt;sector)
+				seek_dist = rq-&gt;sector - aic-&gt;last_request_pos;
+			else
+				seek_dist = aic-&gt;last_request_pos - rq-&gt;sector;
+			as_update_seekdist(ad, aic, seek_dist);
+		}
 		aic-&gt;last_request_pos = rq-&gt;sector + rq-&gt;nr_sectors;
-
-		/*
-		 * Don't allow the seek distance to get too large from the
-		 * odd fragment, pagein, etc
-		 */
-		if (aic-&gt;seek_samples &lt; 400) /* second&amp;third seek */
-			seek_dist = min(seek_dist, (aic-&gt;seek_mean * 4)
-							+ 2*1024*1024);
-		else
-			seek_dist = min(seek_dist, (aic-&gt;seek_mean * 4)
-							+ 2*1024*64);
-
-		aic-&gt;seek_samples += 256;
-		aic-&gt;seek_total += (u64)256*seek_dist;
-		if (aic-&gt;seek_samples) {
-			u64 total = aic-&gt;seek_total + (aic-&gt;seek_samples&gt;&gt;1);
-			do_div(total, aic-&gt;seek_samples);
-			aic-&gt;seek_mean = (sector_t)total;
-		}
-		aic-&gt;seek_samples = (aic-&gt;seek_samples&gt;&gt;1)
-					+ (aic-&gt;seek_samples&gt;&gt;2);
-		aic-&gt;seek_total = (aic-&gt;seek_total&gt;&gt;1)
-					+ (aic-&gt;seek_total&gt;&gt;2);
-
+		set_bit(AS_TASK_IOSTARTED, &amp;aic-&gt;state);
 		spin_unlock(&amp;aic-&gt;lock);
 	}
 }
@@ -1376,8 +1413,8 @@ static void as_add_request(struct as_dat
 	arq-&gt;io_context = as_get_io_context();
 
 	if (arq-&gt;io_context) {
+		as_update_iohist(ad, arq-&gt;io_context-&gt;aic, arq-&gt;request);
 		atomic_inc(&amp;arq-&gt;io_context-&gt;aic-&gt;nr_queued);
-		as_update_iohist(arq-&gt;io_context-&gt;aic, arq-&gt;request);
 	}
 
 	alias = as_add_arq_rb(ad, arq);
@@ -1885,6 +1922,17 @@ as_var_store(unsigned long *var, const c
 	return count;
 }
 
+static ssize_t as_est_show(struct as_data *ad, char *page)
+{
+	int pos = 0;
+
+	pos += sprintf(page+pos, "%lu %% exit probability\n", 100*ad-&gt;exit_prob/256);
+	pos += sprintf(page+pos, "%lu ms new thinktime\n", ad-&gt;new_ttime_mean);
+	pos += sprintf(page+pos, "%llu sectors new seek distance\n", (unsigned long long)ad-&gt;new_seek_mean);
+
+	return pos;
+}
+
 #define SHOW_FUNCTION(__FUNC, __VAR)					\
 static ssize_t __FUNC(struct as_data *ad, char *page)		\
 {									\
@@ -1916,6 +1964,10 @@ STORE_FUNCTION(as_write_batchexpire_stor
 			&amp;ad-&gt;batch_expire[REQ_ASYNC], 0, INT_MAX);
 #undef STORE_FUNCTION
 
+static struct as_fs_entry as_est_entry = {
+	.attr = {.name = "est_time", .mode = S_IRUGO },
+	.show = as_est_show,
+};
 static struct as_fs_entry as_readexpire_entry = {
 	.attr = {.name = "read_expire", .mode = S_IRUGO | S_IWUSR },
 	.show = as_readexpire_show,
@@ -1943,6 +1995,7 @@ static struct as_fs_entry as_write_batch
 };
 
 static struct attribute *default_attrs[] = {
+	&amp;as_est_entry.attr,
 	&amp;as_readexpire_entry.attr,
 	&amp;as_writeexpire_entry.attr,
 	&amp;as_anticexpire_entry.attr,

_
</pre></body></html>