diff -urN linux-2.4.18-preempt/Documentation/Configure.help linux/Documentation/Configure.help
--- linux-2.4.18-preempt/Documentation/Configure.help	Mon Feb 25 17:30:17 2002
+++ linux/Documentation/Configure.help	Mon Feb 25 17:49:43 2002
@@ -277,6 +277,18 @@
   Say Y here if you are building a kernel for a desktop, embedded or
   real-time system.  Say N if you are unsure.
 
+Break Selected Locks
+CONFIG_LOCK_BREAK
+  This option will break certain locks in high-latency regions
+  throughout the kernel.  It is intended for use in conjunction with
+  the preemptible kernel (CONFIG_PREEMPT).  Since in-kernel preemption
+  can not occur while locks are held, temporarily releasing and then
+  reacquiring long-held locks will further improve system response.
+
+  Say Y if you are compiling for a system with strict latency
+  requirements such as an embedded, real-time, or audio processing
+  system.  Say N otherwise.
+
 Kernel math emulation
 CONFIG_MATH_EMULATION
   Linux can emulate a math coprocessor (used for floating point
diff -urN linux-2.4.18-preempt/arch/i386/config.in linux/arch/i386/config.in
--- linux-2.4.18-preempt/arch/i386/config.in	Mon Feb 25 17:29:57 2002
+++ linux/arch/i386/config.in	Mon Feb 25 17:49:43 2002
@@ -186,6 +186,9 @@
 bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
 bool 'Symmetric multi-processing support' CONFIG_SMP
 bool 'Preemptible Kernel' CONFIG_PREEMPT
+if [ "$CONFIG_PREEMPT" = "y" ]; then
+   bool 'Break selected locks' CONFIG_LOCK_BREAK
+fi
 if [ "$CONFIG_SMP" != "y" ]; then
    bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC
    dep_bool 'IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC $CONFIG_X86_UP_APIC
diff -urN linux-2.4.18-preempt/arch/sh/config.in linux/arch/sh/config.in
--- linux-2.4.18-preempt/arch/sh/config.in	Mon Feb 25 17:30:10 2002
+++ linux/arch/sh/config.in	Mon Feb 25 17:49:43 2002
@@ -125,6 +125,7 @@
    hex 'Physical memory size' CONFIG_MEMORY_SIZE 00400000
 fi
 bool 'Preemptible Kernel' CONFIG_PREEMPT
+dep_bool 'Break selected locks' CONFIG_LOCK_BREAK $CONFIG_PREEMPT
 endmenu
 
 if [ "$CONFIG_SH_HP690" = "y" ]; then
diff -urN linux-2.4.18-preempt/drivers/char/mem.c linux/drivers/char/mem.c
--- linux-2.4.18-preempt/drivers/char/mem.c	Mon Feb 25 17:29:32 2002
+++ linux/drivers/char/mem.c	Mon Feb 25 17:49:43 2002
@@ -400,7 +400,7 @@
 		if (count > size)
 			count = size;
 
-		zap_page_range(mm, addr, count);
+		zap_page_range(mm, addr, count, ZPR_NORMAL);
         	zeromap_page_range(addr, count, PAGE_COPY);
 
 		size -= count;
diff -urN linux-2.4.18-preempt/drivers/char/tty_io.c linux/drivers/char/tty_io.c
--- linux-2.4.18-preempt/drivers/char/tty_io.c	Mon Feb 25 17:29:32 2002
+++ linux/drivers/char/tty_io.c	Mon Feb 25 17:49:43 2002
@@ -727,6 +727,7 @@
 			ret = -ERESTARTSYS;
 			if (signal_pending(current))
 				break;
+			debug_lock_break(551);
 			if (current->need_resched)
 				schedule();
 		}
diff -urN linux-2.4.18-preempt/fs/buffer.c linux/fs/buffer.c
--- linux-2.4.18-preempt/fs/buffer.c	Mon Feb 25 17:29:12 2002
+++ linux/fs/buffer.c	Mon Feb 25 17:49:43 2002
@@ -262,6 +262,11 @@
 		}
 		if (dev && bh->b_dev != dev)
 			continue;
+		if (conditional_schedule_needed()) {
+			debug_lock_break(1);
+			spin_unlock(&lru_list_lock);
+			return -EAGAIN;
+		}
 
 		get_bh(bh);
 		spin_unlock(&lru_list_lock);
@@ -682,6 +687,13 @@
 			/* Not hashed? */
 			if (!bh->b_pprev)
 				continue;
+			if (conditional_schedule_needed()) {
+				debug_lock_break(2); /* bkl is held too */
+				get_bh(bh);
+				break_spin_lock_and_resched(&lru_list_lock);
+				put_bh(bh);
+				slept = 1;
+			}
 			if (buffer_locked(bh)) {
 				get_bh(bh);
 				spin_unlock(&lru_list_lock);
@@ -833,6 +845,8 @@
 	struct buffer_head *bh;
 	struct inode tmp;
 	int err = 0, err2;
+
+	DEFINE_LOCK_COUNT();
 	
 	INIT_LIST_HEAD(&tmp.i_dirty_buffers);
 	
@@ -854,6 +868,12 @@
 				spin_lock(&lru_list_lock);
 			}
 		}
+		/* haven't hit this code path ... */
+		debug_lock_break(551);
+		if (TEST_LOCK_COUNT(32)) {
+			RESET_LOCK_COUNT();
+			break_spin_lock(&lru_list_lock);
+		}
 	}
 
 	while (!list_empty(&tmp.i_dirty_buffers)) {
@@ -883,6 +903,7 @@
 	struct inode tmp;
 	int err = 0, err2;
 	
+	DEFINE_LOCK_COUNT();
 	INIT_LIST_HEAD(&tmp.i_dirty_data_buffers);
 	
 	spin_lock(&lru_list_lock);
@@ -914,9 +935,14 @@
 		if (!buffer_uptodate(bh))
 			err = -EIO;
 		brelse(bh);
+		debug_lock_break(1);
+		if (TEST_LOCK_COUNT(32)) {
+			RESET_LOCK_COUNT();
+			conditional_schedule();
+		}
 		spin_lock(&lru_list_lock);
 	}
-	
+
 	spin_unlock(&lru_list_lock);
 	err2 = osync_inode_data_buffers(inode);
 
@@ -943,6 +969,8 @@
 	struct list_head *list;
 	int err = 0;
 
+	DEFINE_LOCK_COUNT();
+
 	spin_lock(&lru_list_lock);
 	
  repeat:
@@ -950,6 +978,17 @@
 	for (list = inode->i_dirty_buffers.prev; 
 	     bh = BH_ENTRY(list), list != &inode->i_dirty_buffers;
 	     list = bh->b_inode_buffers.prev) {
+		/* untested code path ... */
+		debug_lock_break(551);
+ 
+		if (TEST_LOCK_COUNT(32)) {
+			RESET_LOCK_COUNT();
+			if (conditional_schedule_needed()) {
+				break_spin_lock(&lru_list_lock);
+				goto repeat;
+			}
+		}
+ 
 		if (buffer_locked(bh)) {
 			get_bh(bh);
 			spin_unlock(&lru_list_lock);
diff -urN linux-2.4.18-preempt/fs/dcache.c linux/fs/dcache.c
--- linux-2.4.18-preempt/fs/dcache.c	Mon Feb 25 17:29:12 2002
+++ linux/fs/dcache.c	Mon Feb 25 17:49:43 2002
@@ -320,11 +320,24 @@
  
 void prune_dcache(int count)
 {
+	DEFINE_LOCK_COUNT();
+
 	spin_lock(&dcache_lock);
+
+redo:
 	for (;;) {
 		struct dentry *dentry;
 		struct list_head *tmp;
 
+		if (TEST_LOCK_COUNT(100)) {
+			RESET_LOCK_COUNT();
+			debug_lock_break(1);
+			if (conditional_schedule_needed()) {
+				break_spin_lock(&dcache_lock);
+				goto redo;
+			}
+		}
+
 		tmp = dentry_unused.prev;
 
 		if (tmp == &dentry_unused)
@@ -480,6 +493,8 @@
 	struct list_head *next;
 	int found = 0;
 
+	DEFINE_LOCK_COUNT();
+
 	spin_lock(&dcache_lock);
 repeat:
 	next = this_parent->d_subdirs.next;
@@ -493,6 +508,12 @@
 			list_add(&dentry->d_lru, dentry_unused.prev);
 			found++;
 		}
+		if (TEST_LOCK_COUNT(500) && found > 10) {
+			debug_lock_break(1);
+			if (conditional_schedule_needed())
+				goto out;
+			RESET_LOCK_COUNT();
+		}
 		/*
 		 * Descend a level if the d_subdirs list is non-empty.
 		 */
@@ -517,6 +538,7 @@
 #endif
 		goto resume;
 	}
+out:
 	spin_unlock(&dcache_lock);
 	return found;
 }
diff -urN linux-2.4.18-preempt/fs/ext3/inode.c linux/fs/ext3/inode.c
--- linux-2.4.18-preempt/fs/ext3/inode.c	Mon Feb 25 17:29:15 2002
+++ linux/fs/ext3/inode.c	Mon Feb 25 17:50:36 2002
@@ -1649,6 +1649,8 @@
 	}
 
 	for (p = first; p < last; p++) {
+		debug_lock_break(1); /* bkl is held */
+		conditional_schedule();
 		nr = le32_to_cpu(*p);
 		if (nr) {
 			/* accumulate blocks to free if they're contiguous */
@@ -1714,6 +1716,9 @@
 			/* Go read the buffer for the next level down */
 			bh = sb_bread(inode->i_sb, nr);
 
+			debug_lock_break(1);
+			conditional_schedule();
+
 			/*
 			 * A read failure? Report error and clear slot
 			 * (should be rare).
diff -urN linux-2.4.18-preempt/fs/ext3/namei.c linux/fs/ext3/namei.c
--- linux-2.4.18-preempt/fs/ext3/namei.c	Mon Feb 25 17:29:15 2002
+++ linux/fs/ext3/namei.c	Mon Feb 25 17:49:43 2002
@@ -157,6 +157,8 @@
 		if ((bh = bh_use[ra_ptr++]) == NULL)
 			goto next;
 		wait_on_buffer(bh);
+		debug_lock_break(1);
+		conditional_schedule();
 		if (!buffer_uptodate(bh)) {
 			/* read error, skip block & hope for the best */
 			brelse(bh);
diff -urN linux-2.4.18-preempt/fs/inode.c linux/fs/inode.c
--- linux-2.4.18-preempt/fs/inode.c	Mon Feb 25 17:29:12 2002
+++ linux/fs/inode.c	Mon Feb 25 17:49:43 2002
@@ -567,6 +567,12 @@
 		if (tmp == head)
 			break;
 		inode = list_entry(tmp, struct inode, i_list);
+
+		debug_lock_break(2); /* bkl is also held */
+		atomic_inc(&inode->i_count);
+		break_spin_lock_and_resched(&inode_lock);
+		atomic_dec(&inode->i_count);
+
 		if (inode->i_sb != sb)
 			continue;
 		invalidate_inode_buffers(inode);
@@ -668,8 +674,11 @@
 	int count;
 	struct inode * inode;
 
+	DEFINE_LOCK_COUNT();
+
 	spin_lock(&inode_lock);
 
+free_unused:
 	count = 0;
 	entry = inode_unused.prev;
 	while (entry != &inode_unused)
@@ -692,6 +701,14 @@
 		count++;
 		if (!--goal)
 			break;
+		if (TEST_LOCK_COUNT(32)) {
+			RESET_LOCK_COUNT();
+			debug_lock_break(1);
+			if (conditional_schedule_needed()) {
+				break_spin_lock(&inode_lock);
+				goto free_unused;
+			}
+		}
 	}
 	inodes_stat.nr_unused -= count;
 	spin_unlock(&inode_lock);
diff -urN linux-2.4.18-preempt/fs/jbd/commit.c linux/fs/jbd/commit.c
--- linux-2.4.18-preempt/fs/jbd/commit.c	Mon Feb 25 17:29:15 2002
+++ linux/fs/jbd/commit.c	Mon Feb 25 17:49:43 2002
@@ -212,6 +212,9 @@
 				__journal_remove_journal_head(bh);
 				refile_buffer(bh);
 				__brelse(bh);
+				debug_lock_break(2);
+				if (conditional_schedule_needed())
+					break;
 			}
 		}
 		if (bufs == ARRAY_SIZE(wbuf)) {
@@ -235,8 +238,7 @@
 		journal_brelse_array(wbuf, bufs);
 		lock_journal(journal);
 		spin_lock(&journal_datalist_lock);
-		if (bufs)
-			goto write_out_data_locked;
+		goto write_out_data_locked;
 	}
 
 	/*
@@ -272,6 +274,14 @@
 	 */
 	while ((jh = commit_transaction->t_async_datalist)) {
 		struct buffer_head *bh = jh2bh(jh);
+		if (conditional_schedule_needed()) {
+			debug_lock_break(551);
+			spin_unlock(&journal_datalist_lock);
+			unlock_journal(journal);
+			lock_journal(journal);
+			spin_lock(&journal_datalist_lock);
+			continue;
+		}
 		if (buffer_locked(bh)) {
 			spin_unlock(&journal_datalist_lock);
 			unlock_journal(journal);
diff -urN linux-2.4.18-preempt/fs/reiserfs/bitmap.c linux/fs/reiserfs/bitmap.c
--- linux-2.4.18-preempt/fs/reiserfs/bitmap.c	Mon Feb 25 17:29:14 2002
+++ linux/fs/reiserfs/bitmap.c	Mon Feb 25 17:49:43 2002
@@ -410,19 +410,23 @@
 	amount_needed++ ;
 	continue ;
     }
-       
 
-    reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[i], 1) ;
+   RFALSE( is_reusable (s, search_start, 0) == 0,
+           "vs-4140: bad block number found");
 
-    RFALSE( buffer_locked (SB_AP_BITMAP (s)[i]) || 
-	    is_reusable (s, search_start, 0) == 0,
-	    "vs-4140: bitmap block is locked or bad block number found");
+   reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[i], 1) ;
 
     /* if this bit was already set, we've scheduled, and someone else
     ** has allocated it.  loop around and try again
     */
     if (reiserfs_test_and_set_le_bit (j, SB_AP_BITMAP (s)[i]->b_data)) {
 	reiserfs_restore_prepared_buffer(s, SB_AP_BITMAP(s)[i]) ;
+	/* if this block has been allocated while we slept, it is
+	** impossible to find any more contiguous blocks for ourselves.
+	** If we are doing preallocation, give up now and return.
+	*/
+	if (for_prealloc)
+	    goto free_and_return;
 	amount_needed++ ;
 	continue ;
     }    
diff -urN linux-2.4.18-preempt/fs/reiserfs/buffer2.c linux/fs/reiserfs/buffer2.c
--- linux-2.4.18-preempt/fs/reiserfs/buffer2.c	Mon Feb 25 17:29:14 2002
+++ linux/fs/reiserfs/buffer2.c	Mon Feb 25 17:49:43 2002
@@ -55,6 +55,8 @@
     PROC_EXP( unsigned int ctx_switches = kstat.context_swtch );
 
     result = bread (super -> s_dev, n_block, n_size);
+    debug_lock_break(1);
+    conditional_schedule();
     PROC_INFO_INC( super, breads );
     PROC_EXP( if( kstat.context_swtch != ctx_switches ) 
 	      PROC_INFO_INC( super, bread_miss ) );
diff -urN linux-2.4.18-preempt/fs/reiserfs/journal.c linux/fs/reiserfs/journal.c
--- linux-2.4.18-preempt/fs/reiserfs/journal.c	Mon Feb 25 17:29:14 2002
+++ linux/fs/reiserfs/journal.c	Mon Feb 25 17:51:15 2002
@@ -574,6 +574,8 @@
 /* lock the current transaction */
 inline static void lock_journal(struct super_block *p_s_sb) {
   PROC_INFO_INC( p_s_sb, journal.lock_journal );
+  debug_lock_break(1);
+  conditional_schedule();
   while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_wlock)) > 0) {
     PROC_INFO_INC( p_s_sb, journal.lock_journal_wait );
     sleep_on(&(SB_JOURNAL(p_s_sb)->j_wait)) ;
@@ -704,6 +706,8 @@
 	mark_buffer_dirty(tbh) ;
       }
       ll_rw_block(WRITE, 1, &tbh) ;
+      debug_lock_break(1);
+      conditional_schedule();
       count++ ;
       put_bh(tbh) ; /* once for our get_hash */
     } 
@@ -833,6 +837,8 @@
     set_bit(BH_Dirty, &(SB_JOURNAL(p_s_sb)->j_header_bh->b_state)) ;
     ll_rw_block(WRITE, 1, &(SB_JOURNAL(p_s_sb)->j_header_bh)) ;
     wait_on_buffer((SB_JOURNAL(p_s_sb)->j_header_bh)) ; 
+    debug_lock_break(1);
+    conditional_schedule();
     if (!buffer_uptodate(SB_JOURNAL(p_s_sb)->j_header_bh)) {
       printk( "reiserfs: journal-837: IO error during journal replay\n" );
       return -EIO ;
@@ -2089,6 +2095,8 @@
 }
 
 int journal_begin(struct reiserfs_transaction_handle *th, struct super_block  * p_s_sb, unsigned long nblocks) {
+  debug_lock_break(1);
+  conditional_schedule();
   return do_journal_begin_r(th, p_s_sb, nblocks, 0) ;
 }
 
@@ -2229,6 +2237,8 @@
 }
 
 int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) {
+  debug_lock_break(1);
+  conditional_schedule();
   return do_journal_end(th, p_s_sb, nblocks, 0) ;
 }
 
@@ -2680,6 +2690,8 @@
       RFALSE( buffer_locked(bh) && cur_tb != NULL,
 	      "waiting while do_balance was running\n") ;
       wait_on_buffer(bh) ;
+      debug_lock_break(1);
+      conditional_schedule();
     }
     PROC_INFO_INC( p_s_sb, journal.prepare_retry );
     retry_count++ ;
@@ -2852,6 +2864,8 @@
     /* copy all the real blocks into log area.  dirty log blocks */
     if (test_bit(BH_JDirty, &cn->bh->b_state)) {
       struct buffer_head *tmp_bh ;
+      debug_lock_break(1);
+      conditional_schedule();
       tmp_bh = sb_getblk(p_s_sb, reiserfs_get_journal_block(p_s_sb) + 
 		     ((cur_write_start + jindex) % JOURNAL_BLOCK_COUNT)) ;
       mark_buffer_uptodate(tmp_bh, 1) ;
diff -urN linux-2.4.18-preempt/fs/reiserfs/stree.c linux/fs/reiserfs/stree.c
--- linux-2.4.18-preempt/fs/reiserfs/stree.c	Mon Feb 25 17:29:14 2002
+++ linux/fs/reiserfs/stree.c	Mon Feb 25 17:49:43 2002
@@ -648,9 +648,8 @@
                                        stop at leaf level - set to
                                        DISK_LEAF_NODE_LEVEL */
     ) {
-    int  n_block_number = SB_ROOT_BLOCK (p_s_sb),
-      expected_level = SB_TREE_HEIGHT (p_s_sb),
-      n_block_size    = p_s_sb->s_blocksize;
+    int n_block_number, expected_level;
+    int n_block_size    = p_s_sb->s_blocksize;
     struct buffer_head  *       p_s_bh;
     struct path_element *       p_s_last_element;
     int				n_node_level, n_retval;
@@ -662,7 +661,10 @@
 #endif
     
     PROC_INFO_INC( p_s_sb, search_by_key );
-    
+
+    debug_lock_break(1);
+    conditional_schedule();
+
     /* As we add each node to a path we increase its count.  This means that
        we must be careful to release all nodes in a path before we either
        discard the path struct or re-use the path struct, as we do here. */
@@ -674,6 +676,8 @@
     /* With each iteration of this loop we search through the items in the
        current node, and calculate the next current node(next path element)
        for the next iteration of this loop.. */
+    n_block_number = SB_ROOT_BLOCK (p_s_sb);
+    expected_level = SB_TREE_HEIGHT (p_s_sb);
     while ( 1 ) {
 
 #ifdef CONFIG_REISERFS_CHECK
@@ -1100,6 +1104,9 @@
 	    for (n_counter = *p_n_removed;
 		 n_counter < n_unfm_number; n_counter++, p_n_unfm_pointer-- ) {
 
+		debug_lock_break(1);
+		conditional_schedule();
+
 		if (item_moved (&s_ih, p_s_path)) {
 		    need_research = 1 ;
 		    break;
diff -urN linux-2.4.18-preempt/include/linux/lock_break.h linux/include/linux/lock_break.h
--- linux-2.4.18-preempt/include/linux/lock_break.h	Wed Dec 31 19:00:00 1969
+++ linux/include/linux/lock_break.h	Mon Feb 25 17:49:43 2002
@@ -0,0 +1,84 @@
+/*
+ * include/linux/lock_break.h - lock breaking routines
+ *
+ * since in-kernel preemption can not occur while a lock is
+ * held, we can drop and reacquire long-held locks when they are
+ * in a natural quiescent state to further lower system latency.
+ *
+ * (C) 2001 Robert Love
+ *
+ */
+
+#ifndef _LINUX_LOCK_BREAK_H
+#define _LINUX_LOCK_BREAK_H
+
+#include <linux/compiler.h>
+
+/*
+ * setting this to 1 will instruct debug_lock_break to
+ * note when the expected lock count does not equal the
+ * actual count. if the lock count is higher than expected,
+ * we aren't dropping enough locks.  if it is 0, we are
+ * wasting our time since the system is already preemptible.
+ */
+#ifndef DEBUG_LOCK_BREAK
+#define DEBUG_LOCK_BREAK 0
+#endif
+
+#ifdef CONFIG_LOCK_BREAK
+
+#define conditional_schedule_needed() (unlikely(current->need_resched))
+
+/*
+ * setting the task's state to TASK_RUNNING is nothing but paranoia,
+ * in the case where a task is delinquent in properly putting itself
+ * to sleep.  we should test without it.
+ */
+#define unconditional_schedule() do { \
+	__set_current_state(TASK_RUNNING); \
+	schedule(); \
+} while(0)
+
+#define conditional_schedule() do { \
+	if (conditional_schedule_needed()) \
+		unconditional_schedule(); \
+} while(0)
+
+#define break_spin_lock(n) do { \
+	spin_unlock(n); \
+	spin_lock(n); \
+} while(0)
+
+#define break_spin_lock_and_resched(n) do { \
+	spin_unlock(n); \
+	conditional_schedule(); \
+	spin_lock(n); \
+} while(0)
+
+#if DEBUG_LOCK_BREAK
+#define debug_lock_break(n) do { \
+	if (current->preempt_count != n) \
+		printk(KERN_ERR "lock_break: %s:%d: count was %d not %d\n", \
+			__FILE__, __LINE__, current->preempt_count, n); \
+} while(0)
+#else
+#define debug_lock_break(n)
+#endif
+
+#define DEFINE_LOCK_COUNT() int _lock_break_count = 0
+#define TEST_LOCK_COUNT(n) (++_lock_break_count > (n))
+#define RESET_LOCK_COUNT() _lock_break_count = 0
+
+#else
+#define unconditional_schedule()
+#define conditional_schedule()
+#define conditional_schedule_needed() 0
+#define break_spin_lock(n)
+#define break_spin_lock_and_resched(n)
+#define debug_lock_break(n)
+#define DEFINE_LOCK_COUNT()
+#define TEST_LOCK_COUNT(n) 0
+#define RESET_LOCK_COUNT()
+#endif
+
+#endif /* _LINUX_LOCK_BREAK_H */
diff -urN linux-2.4.18-preempt/include/linux/mm.h linux/include/linux/mm.h
--- linux-2.4.18-preempt/include/linux/mm.h	Mon Feb 25 17:29:15 2002
+++ linux/include/linux/mm.h	Mon Feb 25 17:49:43 2002
@@ -121,6 +121,9 @@
  */
 extern pgprot_t protection_map[16];
 
+#define ZPR_MAX_BYTES 256*PAGE_SIZE
+#define ZPR_NORMAL 0 /* perform zap_page_range request in one walk */
+#define ZPR_PARTITION 1 /* partition into a series of smaller operations */
 
 /*
  * These are the virtual MM functions - opening of an area, closing and
@@ -404,7 +407,7 @@
 extern void shmem_lock(struct file * file, int lock);
 extern int shmem_zero_setup(struct vm_area_struct *);
 
-extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size);
+extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, int actions);
 extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma);
 extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
 extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
diff -urN linux-2.4.18-preempt/include/linux/sched.h linux/include/linux/sched.h
--- linux-2.4.18-preempt/include/linux/sched.h	Mon Feb 25 17:29:15 2002
+++ linux/include/linux/sched.h	Mon Feb 25 17:49:43 2002
@@ -26,6 +26,7 @@
 #include <linux/signal.h>
 #include <linux/securebits.h>
 #include <linux/fs_struct.h>
+#include <linux/lock_break.h>
 
 struct exec_domain;
 
diff -urN linux-2.4.18-preempt/kernel/exit.c linux/kernel/exit.c
--- linux-2.4.18-preempt/kernel/exit.c	Mon Feb 25 17:29:15 2002
+++ linux/kernel/exit.c	Mon Feb 25 17:49:43 2002
@@ -203,6 +203,8 @@
 			}
 			i++;
 			set >>= 1;
+			debug_lock_break(1);
+			conditional_schedule();
 		}
 	}
 }
diff -urN linux-2.4.18-preempt/mm/filemap.c linux/mm/filemap.c
--- linux-2.4.18-preempt/mm/filemap.c	Mon Feb 25 17:29:15 2002
+++ linux/mm/filemap.c	Mon Feb 25 17:53:02 2002
@@ -296,6 +296,7 @@
 
 			page_cache_release(page);
 
+			/* we hit this with lock depth of 1 or 2 */
 			if (current->need_resched) {
 				__set_current_state(TASK_RUNNING);
 				schedule();
@@ -406,6 +407,8 @@
 		}
 
 		page_cache_release(page);
+
+		debug_lock_break(551);
 		if (current->need_resched) {
 			__set_current_state(TASK_RUNNING);
 			schedule();
@@ -560,12 +563,16 @@
 		list_del(&page->list);
 		list_add(&page->list, &mapping->locked_pages);
 
-		if (!PageDirty(page))
-			continue;
-
 		page_cache_get(page);
 		spin_unlock(&pagecache_lock);
 
+		/* BKL is held ... */
+		debug_lock_break(1);
+		conditional_schedule();
+
+		if (!PageDirty(page))
+			goto clean;
+
 		lock_page(page);
 
 		if (PageDirty(page)) {
@@ -576,7 +583,7 @@
 				ret = err;
 		} else
 			UnlockPage(page);
-
+clean:
 		page_cache_release(page);
 		spin_lock(&pagecache_lock);
 	}
@@ -595,14 +602,28 @@
 {
 	int ret = 0;
 
+	DEFINE_LOCK_COUNT();
+
 	spin_lock(&pagecache_lock);
 
+restart:
         while (!list_empty(&mapping->locked_pages)) {
 		struct page *page = list_entry(mapping->locked_pages.next, struct page, list);
 
 		list_del(&page->list);
 		list_add(&page->list, &mapping->clean_pages);
 
+		debug_lock_break(2);
+		if (TEST_LOCK_COUNT(32)) {
+			RESET_LOCK_COUNT();
+			if (conditional_schedule_needed()) {
+				page_cache_get(page);
+				break_spin_lock_and_resched(&pagecache_lock);
+				page_cache_release(page);
+				goto restart;
+			}
+		}
+
 		if (!PageLocked(page))
 			continue;
 
@@ -869,6 +890,7 @@
 	 * the hash-list needs a held write-lock.
 	 */
 repeat:
+	break_spin_lock(&pagecache_lock);
 	page = __find_page_nolock(mapping, offset, hash);
 	if (page) {
 		page_cache_get(page);
@@ -2031,6 +2053,8 @@
 		address += PAGE_SIZE;
 		pte++;
 	} while (address && (address < end));
+	debug_lock_break(1);
+	break_spin_lock(&vma->vm_mm->page_table_lock);
 	return error;
 }
 
@@ -2061,6 +2085,9 @@
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
+
+	debug_lock_break(1);
+	break_spin_lock(&vma->vm_mm->page_table_lock);
 	return error;
 }
 
@@ -2438,7 +2465,7 @@
 	if (vma->vm_flags & VM_LOCKED)
 		return -EINVAL;
 
-	zap_page_range(vma->vm_mm, start, end - start);
+	zap_page_range(vma->vm_mm, start, end - start, ZPR_PARTITION);
 	return 0;
 }
 
diff -urN linux-2.4.18-preempt/mm/memory.c linux/mm/memory.c
--- linux-2.4.18-preempt/mm/memory.c	Mon Feb 25 17:29:15 2002
+++ linux/mm/memory.c	Mon Feb 25 17:49:44 2002
@@ -355,7 +355,8 @@
 /*
  * remove user pages in a given range.
  */
-void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
+void do_zap_page_range(struct mm_struct *mm, unsigned long address,
+			      unsigned long size)
 {
 	mmu_gather_t *tlb;
 	pgd_t * dir;
@@ -633,6 +634,20 @@
 	iobuf->locked = 0;
 }
 
+void zap_page_range(struct mm_struct *mm, unsigned long address,
+		    unsigned long size, int actions)
+{
+	while (size) {
+		unsigned long chunk = size;
+		
+		if (actions & ZPR_PARTITION && chunk > ZPR_MAX_BYTES)
+			chunk = ZPR_MAX_BYTES;
+		do_zap_page_range(mm, address, chunk);
+
+		address += chunk;
+		size -= chunk;
+	}
+}
 
 /*
  * Lock down all of the pages of a kiovec for IO.
@@ -742,11 +757,15 @@
 	return 0;
 }
 
-static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
-                                     unsigned long size, pgprot_t prot)
+static inline void zeromap_pte_range(struct mm_struct *mm, pte_t * pte,
+				     unsigned long address, unsigned long size,
+				     pgprot_t prot)
 {
 	unsigned long end;
 
+	debug_lock_break(1);
+	break_spin_lock(&mm->page_table_lock);
+
 	address &= ~PMD_MASK;
 	end = address + size;
 	if (end > PMD_SIZE)
@@ -774,7 +793,7 @@
 		pte_t * pte = pte_alloc(mm, pmd, address);
 		if (!pte)
 			return -ENOMEM;
-		zeromap_pte_range(pte, address, end - address, prot);
+		zeromap_pte_range(mm, pte, address, end - address, prot);
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
@@ -1008,7 +1027,7 @@
 
 		/* mapping wholly truncated? */
 		if (mpnt->vm_pgoff >= pgoff) {
-			zap_page_range(mm, start, len);
+			zap_page_range(mm, start, len, ZPR_NORMAL);
 			continue;
 		}
 
@@ -1021,7 +1040,7 @@
 		/* Ok, partially affected.. */
 		start += diff << PAGE_SHIFT;
 		len = (len - diff) << PAGE_SHIFT;
-		zap_page_range(mm, start, len);
+		zap_page_range(mm, start, len, ZPR_NORMAL);
 	} while ((mpnt = mpnt->vm_next_share) != NULL);
 }
 
diff -urN linux-2.4.18-preempt/mm/mmap.c linux/mm/mmap.c
--- linux-2.4.18-preempt/mm/mmap.c	Mon Feb 25 17:29:15 2002
+++ linux/mm/mmap.c	Mon Feb 25 17:49:44 2002
@@ -569,7 +569,7 @@
 	fput(file);
 
 	/* Undo any partial mapping done by a device driver. */
-	zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start);
+	zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start, ZPR_NORMAL);
 free_vma:
 	kmem_cache_free(vm_area_cachep, vma);
 	return error;
@@ -967,7 +967,7 @@
 		remove_shared_vm_struct(mpnt);
 		mm->map_count--;
 
-		zap_page_range(mm, st, size);
+		zap_page_range(mm, st, size, ZPR_PARTITION);
 
 		/*
 		 * Fix the mapping, and free the old area if it wasn't reused.
@@ -1127,7 +1127,7 @@
 		}
 		mm->map_count--;
 		remove_shared_vm_struct(mpnt);
-		zap_page_range(mm, start, size);
+		zap_page_range(mm, start, size, ZPR_PARTITION);
 		if (mpnt->vm_file)
 			fput(mpnt->vm_file);
 		kmem_cache_free(vm_area_cachep, mpnt);
diff -urN linux-2.4.18-preempt/mm/mremap.c linux/mm/mremap.c
--- linux-2.4.18-preempt/mm/mremap.c	Mon Feb 25 17:29:15 2002
+++ linux/mm/mremap.c	Mon Feb 25 17:49:44 2002
@@ -118,7 +118,7 @@
 	flush_cache_range(mm, new_addr, new_addr + len);
 	while ((offset += PAGE_SIZE) < len)
 		move_one_page(mm, new_addr + offset, old_addr + offset);
-	zap_page_range(mm, new_addr, len);
+	zap_page_range(mm, new_addr, len, ZPR_NORMAL);
 	return -1;
 }
 
diff -urN linux-2.4.18-preempt/mm/swapfile.c linux/mm/swapfile.c
--- linux-2.4.18-preempt/mm/swapfile.c	Mon Feb 25 17:29:15 2002
+++ linux/mm/swapfile.c	Mon Feb 25 17:49:44 2002
@@ -696,6 +696,7 @@
 		 * interactive performance.  Interruptible check on
 		 * signal_pending() would be nice, but changes the spec?
 		 */
+		debug_lock_break(551);
 		if (current->need_resched)
 			schedule();
 	}
@@ -1124,6 +1125,13 @@
 		if (swap_info[i].flags != SWP_USED)
 			continue;
 		for (j = 0; j < swap_info[i].max; ++j) {
+			if (conditional_schedule_needed()) {
+				debug_lock_break(551);
+				swap_list_unlock();
+				debug_lock_break(551);
+				unconditional_schedule();
+				swap_list_lock();
+			}
 			switch (swap_info[i].swap_map[j]) {
 				case 0:
 				case SWAP_MAP_BAD:
diff -urN linux-2.4.18-preempt/mm/vmscan.c linux/mm/vmscan.c
--- linux-2.4.18-preempt/mm/vmscan.c	Mon Feb 25 17:29:15 2002
+++ linux/mm/vmscan.c	Mon Feb 25 17:49:44 2002
@@ -158,6 +158,8 @@
 	pte_t * pte;
 	unsigned long pmd_end;
 
+	DEFINE_LOCK_COUNT();
+
 	if (pmd_none(*dir))
 		return count;
 	if (pmd_bad(*dir)) {
@@ -182,6 +184,14 @@
 					address += PAGE_SIZE;
 					break;
 				}
+				/* we reach this with a lock depth of 1 or 2 */
+#if 0
+				if (TEST_LOCK_COUNT(4)) {
+					if (conditional_schedule_needed())
+						return count;
+					RESET_LOCK_COUNT();
+				}
+#endif
 			}
 		}
 		address += PAGE_SIZE;
@@ -215,6 +225,9 @@
 		count = swap_out_pmd(mm, vma, pmd, address, end, count, classzone);
 		if (!count)
 			break;
+		/* lock depth can be 1 or 2 */
+		if (conditional_schedule_needed())
+			return count;
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
@@ -240,6 +253,9 @@
 		count = swap_out_pgd(mm, vma, pgdir, address, end, count, classzone);
 		if (!count)
 			break;
+		/* lock depth can be 1 or 2 */
+		if (conditional_schedule_needed())
+			return count;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		pgdir++;
 	} while (address && (address < end));
@@ -262,6 +278,8 @@
 	 * and ptes.
 	 */
 	spin_lock(&mm->page_table_lock);
+
+continue_scan:
 	address = mm->swap_address;
 	if (address == TASK_SIZE || swap_mm != mm) {
 		/* We raced: don't count this mm but try again */
@@ -278,6 +296,13 @@
 			vma = vma->vm_next;
 			if (!vma)
 				break;
+			/* we reach this with a lock depth of 1 and 2 */
+#if 0
+			if (conditional_schedule_needed()) {
+				break_spin_lock(&mm->page_table_lock);
+				goto continue_scan;
+			}
+#endif
 			if (!count)
 				goto out_unlock;
 			address = vma->vm_start;
@@ -299,6 +324,7 @@
 
 	counter = mmlist_nr;
 	do {
+		/* lock depth can be 0 or 1 */
 		if (unlikely(current->need_resched)) {
 			__set_current_state(TASK_RUNNING);
 			schedule();
@@ -344,6 +370,7 @@
 	while (--max_scan >= 0 && (entry = inactive_list.prev) != &inactive_list) {
 		struct page * page;
 
+		/* lock depth is 1 or 2 */
 		if (unlikely(current->need_resched)) {
 			spin_unlock(&pagemap_lru_lock);
 			__set_current_state(TASK_RUNNING);
@@ -627,8 +654,11 @@
 
 	for (i = pgdat->nr_zones-1; i >= 0; i--) {
 		zone = pgdat->node_zones + i;
+		debug_lock_break(0);
+#ifndef CONFIG_PREEMPT
 		if (unlikely(current->need_resched))
 			schedule();
+#endif
 		if (!zone->need_balance)
 			continue;
 		if (!try_to_free_pages(zone, GFP_KSWAPD, 0)) {
