diff -Nur linux-2.4.33-imedia/CREDITS linux-2.4.33-imedia-patching/CREDITS
--- linux-2.4.33-imedia/CREDITS	2005-01-19 16:09:22.000000000 +0200
+++ linux-2.4.33-imedia-patching/CREDITS	2006-01-26 15:19:42.000000000 +0200
@@ -1007,8 +1007,8 @@
 
 N: Nigel Gamble
 E: nigel@nrg.org
-E: nigel@sgi.com
 D: Interrupt-driven printer driver
+D: Preemptible kernel
 S: 120 Alley Way
 S: Mountain View, California 94040
 S: USA
diff -Nur linux-2.4.33-imedia/Documentation/Configure.help linux-2.4.33-imedia-patching/Documentation/Configure.help
--- linux-2.4.33-imedia/Documentation/Configure.help	2006-01-11 20:29:27.000000000 +0200
+++ linux-2.4.33-imedia-patching/Documentation/Configure.help	2006-01-26 15:19:42.000000000 +0200
@@ -296,6 +296,17 @@
   If you have a system with several CPUs, you do not need to say Y
   here: the local APIC will be used automatically.
 
+Preemptible Kernel
+CONFIG_PREEMPT
+  This option reduces the latency of the kernel when reacting to
+  real-time or interactive events by allowing a low priority process to
+  be preempted even if it is in kernel mode executing a system call.
+  This allows applications to run more reliably even when the system is
+  under load.
+
+  Say Y here if you are building a kernel for a desktop, embedded or
+  real-time system.  Say N if you are unsure.
+
 Kernel math emulation
 CONFIG_MATH_EMULATION
   Linux can emulate a math coprocessor (used for floating point
diff -Nur linux-2.4.33-imedia/Documentation/preempt-locking.txt linux-2.4.33-imedia-patching/Documentation/preempt-locking.txt
--- linux-2.4.33-imedia/Documentation/preempt-locking.txt	1970-01-01 02:00:00.000000000 +0200
+++ linux-2.4.33-imedia-patching/Documentation/preempt-locking.txt	2006-01-26 15:19:42.000000000 +0200
@@ -0,0 +1,104 @@
+		  Proper Locking Under a Preemptible Kernel:
+		       Keeping Kernel Code Preempt-Safe
+			  Robert Love <rml@tech9.net>
+			   Last Updated: 22 Jan 2002
+
+
+INTRODUCTION
+
+
+A preemptible kernel creates new locking issues.  The issues are the same as
+those under SMP: concurrency and reentrancy.  Thankfully, the Linux preemptible
+kernel model leverages existing SMP locking mechanisms.  Thus, the kernel
+requires explicit additional locking for very few additional situations.
+
+This document is for all kernel hackers.  Developing code in the kernel
+requires protecting these situations.
+ 
+
+RULE #1: Per-CPU data structures need explicit protection
+
+
+Two similar problems arise. An example code snippet:
+
+	struct this_needs_locking tux[NR_CPUS];
+	tux[smp_processor_id()] = some_value;
+	/* task is preempted here... */
+	something = tux[smp_processor_id()];
+
+First, since the data is per-CPU, it may not have explicit SMP locking, but
+require it otherwise.  Second, when a preempted task is finally rescheduled,
+the previous value of smp_processor_id may not equal the current.  You must
+protect these situations by disabling preemption around them.
+
+
+RULE #2: CPU state must be protected.
+
+
+Under preemption, the state of the CPU must be protected.  This is arch-
+dependent, but includes CPU structures and state not preserved over a context
+switch.  For example, on x86, entering and exiting FPU mode is now a critical
+section that must occur while preemption is disabled.  Think what would happen
+if the kernel is executing a floating-point instruction and is then preempted.
+Remember, the kernel does not save FPU state except for user tasks.  Therefore,
+upon preemption, the FPU registers will be sold to the lowest bidder.  Thus,
+preemption must be disabled around such regions.
+
+Note, some FPU functions are already explicitly preempt safe.  For example,
+kernel_fpu_begin and kernel_fpu_end will disable and enable preemption.
+However, math_state_restore must be called with preemption disabled.
+
+
+RULE #3: Lock acquire and release must be performed by same task
+
+
+A lock acquired in one task must be released by the same task.  This
+means you can't do oddball things like acquire a lock and go off to
+play while another task releases it.  If you want to do something
+like this, acquire and release the task in the same code path and
+have the caller wait on an event by the other task.
+
+
+SOLUTION
+
+
+Data protection under preemption is achieved by disabling preemption for the
+duration of the critical region.
+
+preempt_enable()		decrement the preempt counter
+preempt_disable()		increment the preempt counter
+preempt_enable_no_resched()	decrement, but do not immediately preempt
+preempt_get_count()		return the preempt counter
+
+The functions are nestable.  In other words, you can call preempt_disable
+n-times in a code path, and preemption will not be reenabled until the n-th
+call to preempt_enable.  The preempt statements define to nothing if
+preemption is not enabled.
+
+Note that you do not need to explicitly prevent preemption if you are holding
+any locks or interrupts are disabled, since preemption is implicitly disabled
+in those cases.
+
+Example:
+
+	cpucache_t *cc; /* this is per-CPU */
+	preempt_disable();
+	cc = cc_data(searchp);
+	if (cc && cc->avail) {
+		__free_block(searchp, cc_entry(cc), cc->avail);
+		cc->avail = 0;
+	}
+	preempt_enable();
+	return 0;
+
+Notice how the preemption statements must encompass every reference of the
+critical variables.  Another example:
+
+	int buf[NR_CPUS];
+	set_cpu_val(buf);
+	if (buf[smp_processor_id()] == -1) printf(KERN_INFO "wee!\n");
+	spin_lock(&buf_lock);
+	/* ... */
+
+This code is not preempt-safe, but see how easily we can fix it by simply
+moving the spin_lock up two lines.
diff -Nur linux-2.4.33-imedia/Documentation/sched-coding.txt linux-2.4.33-imedia-patching/Documentation/sched-coding.txt
--- linux-2.4.33-imedia/Documentation/sched-coding.txt	1970-01-01 02:00:00.000000000 +0200
+++ linux-2.4.33-imedia-patching/Documentation/sched-coding.txt	2006-01-26 15:19:42.000000000 +0200
@@ -0,0 +1,126 @@
+     Reference for various scheduler-related methods in the O(1) scheduler
+		Robert Love <rml@tech9.net>, MontaVista Software
+
+
+Note most of these methods are local to kernel/sched.c - this is by design.
+The scheduler is meant to be self-contained and abstracted away.  This document
+is primarily for understanding the scheduler, not interfacing to it.  Some of
+the discussed interfaces, however, are general process/scheduling methods.
+They are typically defined in include/linux/sched.h.
+
+
+Main Scheduling Methods
+-----------------------
+
+void load_balance(runqueue_t *this_rq, int idle)
+	Attempts to pull tasks from one cpu to another to balance cpu usage,
+	if needed.  This method is called explicitly if the runqueues are
+	inbalanced or periodically by the timer tick.  Prior to calling,
+	the current runqueue must be locked and interrupts disabled.
+
+void schedule()
+	The main scheduling function.  Upon return, the highest priority
+	process will be active.
+
+
+Locking
+-------
+
+Each runqueue has its own lock, rq->lock.  When multiple runqueues need
+to be locked, lock acquires must be ordered by ascending &runqueue value.
+
+A specific runqueue is locked via
+
+	task_rq_lock(task_t pid, unsigned long *flags)
+
+which disables preemption, disables interrupts, and locks the runqueue pid is
+running on.  Likewise,
+
+	task_rq_unlock(task_t pid, unsigned long *flags)
+
+unlocks the runqueue pid is running on, restores interrupts to their previous
+state, and reenables preemption.
+
+The routines
+
+	double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
+
+and
+
+	double_rq_unlock(runqueue_t *rq1, runqueue_t rq2)
+
+safely lock and unlock, respectively, the two specified runqueues.  They do
+not, however, disable and restore interrupts.  Users are required to do so
+manually before and after calls.
+
+
+Values
+------
+
+MAX_PRIO
+	The maximum priority of the system, stored in the task as task->prio.
+	Lower priorities are higher.  Normal (non-RT) priorities range from
+	MAX_RT_PRIO to (MAX_PRIO - 1).
+MAX_RT_PRIO
+	The maximum real-time priority of the system.  Valid RT priorities
+	range from 0 to (MAX_RT_PRIO - 1).
+MAX_USER_RT_PRIO
+	The maximum real-time priority that is exported to user-space.  Should
+	always be equal to or less than MAX_RT_PRIO.  Setting it less allows
+	kernel threads to have higher priorities than any user-space task.
+MIN_TIMESLICE
+MAX_TIMESLICE
+	Respectively, the minimum and maximum timeslices (quanta) of a process.
+
+Data
+----
+
+struct runqueue
+	The main per-CPU runqueue data structure.
+struct task_struct
+	The main per-process data structure.
+
+
+General Methods
+---------------
+
+cpu_rq(cpu)
+	Returns the runqueue of the specified cpu.
+this_rq()
+	Returns the runqueue of the current cpu.
+task_rq(pid)
+	Returns the runqueue which holds the specified pid.
+cpu_curr(cpu)
+	Returns the task currently running on the given cpu.
+rt_task(pid)
+	Returns true if pid is real-time, false if not.
+
+
+Process Control Methods
+-----------------------
+
+void set_user_nice(task_t *p, long nice)
+	Sets the "nice" value of task p to the given value.
+int setscheduler(pid_t pid, int policy, struct sched_param *param)
+	Sets the scheduling policy and parameters for the given pid.
+void set_cpus_allowed(task_t *p, unsigned long new_mask)
+	Sets a given task's CPU affinity and migrates it to a proper cpu.
+	Callers must have a valid reference to the task and assure the
+	task not exit prematurely.  No locks can be held during the call.
+set_task_state(tsk, state_value)
+	Sets the given task's state to the given value.
+set_current_state(state_value)
+	Sets the current task's state to the given value.
+void set_tsk_need_resched(struct task_struct *tsk)
+	Sets need_resched in the given task.
+void clear_tsk_need_resched(struct task_struct *tsk)
+	Clears need_resched in the given task.
+void set_need_resched()
+	Sets need_resched in the current task.
+void clear_need_resched()
+	Clears need_resched in the current task.
+int need_resched()
+	Returns true if need_resched is set in the current task, false
+	otherwise.
+yield()
+	Place the current process at the end of the runqueue and call schedule.
diff -Nur linux-2.4.33-imedia/Documentation/sched-design.txt linux-2.4.33-imedia-patching/Documentation/sched-design.txt
--- linux-2.4.33-imedia/Documentation/sched-design.txt	1970-01-01 02:00:00.000000000 +0200
+++ linux-2.4.33-imedia-patching/Documentation/sched-design.txt	2006-01-26 15:19:42.000000000 +0200
@@ -0,0 +1,165 @@
+		   Goals, Design and Implementation of the
+		      new ultra-scalable O(1) scheduler
+
+
+  This is an edited version of an email Ingo Molnar sent to
+  lkml on 4 Jan 2002.  It describes the goals, design, and
+  implementation of Ingo's new ultra-scalable O(1) scheduler.
+  Last Updated: 18 April 2002.
+
+
+Goal
+====
+
+The main goal of the new scheduler is to keep all the good things we know
+and love about the current Linux scheduler:
+
+ - good interactive performance even during high load: if the user
+   types or clicks then the system must react instantly and must execute
+   the user tasks smoothly, even during considerable background load.
+
+ - good scheduling/wakeup performance with 1-2 runnable processes.
+
+ - fairness: no process should stay without any timeslice for any
+   unreasonable amount of time. No process should get an unjustly high
+   amount of CPU time.
+
+ - priorities: less important tasks can be started with lower priority,
+   more important tasks with higher priority.
+
+ - SMP efficiency: no CPU should stay idle if there is work to do.
+
+ - SMP affinity: processes which run on one CPU should stay affine to
+   that CPU. Processes should not bounce between CPUs too frequently.
+
+ - plus additional scheduler features: RT scheduling, CPU binding.
+
+and the goal is also to add a few new things:
+
+ - fully O(1) scheduling. Are you tired of the recalculation loop
+   blowing the L1 cache away every now and then? Do you think the goodness
+   loop is taking a bit too long to finish if there are lots of runnable
+   processes? This new scheduler takes no prisoners: wakeup(), schedule(),
+   the timer interrupt are all O(1) algorithms. There is no recalculation
+   loop. There is no goodness loop either.
+
+ - 'perfect' SMP scalability. With the new scheduler there is no 'big'
+   runqueue_lock anymore - it's all per-CPU runqueues and locks - two
+   tasks on two separate CPUs can wake up, schedule and context-switch
+   completely in parallel, without any interlocking. All
+   scheduling-relevant data is structured for maximum scalability.
+
+ - better SMP affinity. The old scheduler has a particular weakness that
+   causes the random bouncing of tasks between CPUs if/when higher
+   priority/interactive tasks, this was observed and reported by many
+   people. The reason is that the timeslice recalculation loop first needs
+   every currently running task to consume its timeslice. But when this
+   happens on eg. an 8-way system, then this property starves an
+   increasing number of CPUs from executing any process. Once the last
+   task that has a timeslice left has finished using up that timeslice,
+   the recalculation loop is triggered and other CPUs can start executing
+   tasks again - after having idled around for a number of timer ticks.
+   The more CPUs, the worse this effect.
+
+   Furthermore, this same effect causes the bouncing effect as well:
+   whenever there is such a 'timeslice squeeze' of the global runqueue,
+   idle processors start executing tasks which are not affine to that CPU.
+   (because the affine tasks have finished off their timeslices already.)
+
+   The new scheduler solves this problem by distributing timeslices on a
+   per-CPU basis, without having any global synchronization or
+   recalculation.
+
+ - batch scheduling. A significant proportion of computing-intensive tasks
+   benefit from batch-scheduling, where timeslices are long and processes
+   are roundrobin scheduled. The new scheduler does such batch-scheduling
+   of the lowest priority tasks - so nice +19 jobs will get
+   'batch-scheduled' automatically. With this scheduler, nice +19 jobs are
+   in essence SCHED_IDLE, from an interactiveness point of view.
+
+ - handle extreme loads more smoothly, without breakdown and scheduling
+   storms.
+
+ - O(1) RT scheduling. For those RT folks who are paranoid about the
+   O(nr_running) property of the goodness loop and the recalculation loop.
+
+ - run fork()ed children before the parent. Andrea has pointed out the
+   advantages of this a few months ago, but patches for this feature
+   do not work with the old scheduler as well as they should,
+   because idle processes often steal the new child before the fork()ing
+   CPU gets to execute it.
+
+
+Design
+======
+
+the core of the new scheduler are the following mechanizms:
+
+ - *two*, priority-ordered 'priority arrays' per CPU. There is an 'active'
+   array and an 'expired' array. The active array contains all tasks that
+   are affine to this CPU and have timeslices left. The expired array
+   contains all tasks which have used up their timeslices - but this array
+   is kept sorted as well. The active and expired array is not accessed
+   directly, it's accessed through two pointers in the per-CPU runqueue
+   structure. If all active tasks are used up then we 'switch' the two
+   pointers and from now on the ready-to-go (former-) expired array is the
+   active array - and the empty active array serves as the new collector
+   for expired tasks.
+
+ - there is a 64-bit bitmap cache for array indices. Finding the highest
+   priority task is thus a matter of two x86 BSFL bit-search instructions.
+
+the split-array solution enables us to have an arbitrary number of active
+and expired tasks, and the recalculation of timeslices can be done
+immediately when the timeslice expires. Because the arrays are always
+access through the pointers in the runqueue, switching the two arrays can
+be done very quickly.
+
+this is a hybride priority-list approach coupled with roundrobin
+scheduling and the array-switch method of distributing timeslices.
+
+ - there is a per-task 'load estimator'.
+
+one of the toughest things to get right is good interactive feel during
+heavy system load. While playing with various scheduler variants i found
+that the best interactive feel is achieved not by 'boosting' interactive
+tasks, but by 'punishing' tasks that want to use more CPU time than there
+is available. This method is also much easier to do in an O(1) fashion.
+
+to establish the actual 'load' the task contributes to the system, a
+complex-looking but pretty accurate method is used: there is a 4-entry
+'history' ringbuffer of the task's activities during the last 4 seconds.
+This ringbuffer is operated without much overhead. The entries tell the
+scheduler a pretty accurate load-history of the task: has it used up more
+CPU time or less during the past N seconds. [the size '4' and the interval
+of 4x 1 seconds was found by lots of experimentation - this part is
+flexible and can be changed in both directions.]
+
+the penalty a task gets for generating more load than the CPU can handle
+is a priority decrease - there is a maximum amount to this penalty
+relative to their static priority, so even fully CPU-bound tasks will
+observe each other's priorities, and will share the CPU accordingly.
+
+the SMP load-balancer can be extended/switched with additional parallel
+computing and cache hierarchy concepts: NUMA scheduling, multi-core CPUs
+can be supported easily by changing the load-balancer. Right now it's
+tuned for my SMP systems.
+
+i skipped the prev->mm == next->mm advantage - no workload i know of shows
+any sensitivity to this. It can be added back by sacrificing O(1)
+schedule() [the current and one-lower priority list can be searched for a
+that->mm == current->mm condition], but costs a fair number of cycles
+during a number of important workloads, so i wanted to avoid this as much
+as possible.
+
+- the SMP idle-task startup code was still racy and the new scheduler
+triggered this. So i streamlined the idle-setup code a bit. We do not call
+into schedule() before all processors have started up fully and all idle
+threads are in place.
+
+- the patch also cleans up a number of aspects of sched.c - moves code
+into other areas of the kernel where it's appropriate, and simplifies
+certain code paths and data constructs. As a result, the new scheduler's
+code is smaller than the old one.
+
+	Ingo
diff -Nur linux-2.4.33-imedia/MAINTAINERS linux-2.4.33-imedia-patching/MAINTAINERS
--- linux-2.4.33-imedia/MAINTAINERS	2005-11-16 21:12:54.000000000 +0200
+++ linux-2.4.33-imedia-patching/MAINTAINERS	2006-01-26 15:19:43.000000000 +0200
@@ -1535,6 +1535,14 @@
 W:	http://prism54.org
 S:	Maintained
 
+PREEMPTIBLE KERNEL
+P:	Robert M. Love
+M:	rml@tech9.net
+L:	linux-kernel@vger.kernel.org
+L:	kpreempt-tech@lists.sourceforge.net
+W:	http://tech9.net/rml/linux
+S:	Supported
+
 PROMISE DC4030 CACHING DISK CONTROLLER DRIVER
 P:	Peter Denison
 M:	promise@pnd-pc.demon.co.uk
@@ -1624,6 +1632,14 @@
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 
+SCHEDULER
+P:     Ingo Molnar
+M:     mingo@elte.hu
+P:     Robert Love
+M:     rml@tech9.net
+L:     linux-kernel@vger.kernel.org
+S:     Maintained
+
 SC1200 WDT DRIVER
 P:      Zwane Mwaikambo
 M:      zwane@commfireservices.com
diff -Nur linux-2.4.33-imedia/Makefile linux-2.4.33-imedia-patching/Makefile
--- linux-2.4.33-imedia/Makefile	2006-01-11 20:29:28.000000000 +0200
+++ linux-2.4.33-imedia-patching/Makefile	2006-01-26 15:19:53.000000000 +0200
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 4
 SUBLEVEL = 33
-EXTRAVERSION = -pre1
+EXTRAVERSION = -imedia
 
 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
 
diff -Nur linux-2.4.33-imedia/arch/alpha/kernel/entry.S linux-2.4.33-imedia-patching/arch/alpha/kernel/entry.S
--- linux-2.4.33-imedia/arch/alpha/kernel/entry.S	2003-06-13 17:51:29.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/alpha/kernel/entry.S	2006-01-26 15:19:42.000000000 +0200
@@ -690,6 +690,7 @@
 .end entSys
 
         .globl  ret_from_fork
+#if CONFIG_SMP
 .align 3
 .ent ret_from_fork
 ret_from_fork:
@@ -697,6 +698,9 @@
 	mov	$17,$16
 	jsr	$31,schedule_tail
 .end ret_from_fork
+#else
+ret_from_fork = ret_from_sys_call
+#endif
 
 .align 3
 .ent reschedule
diff -Nur linux-2.4.33-imedia/arch/alpha/kernel/process.c linux-2.4.33-imedia-patching/arch/alpha/kernel/process.c
--- linux-2.4.33-imedia/arch/alpha/kernel/process.c	2003-08-25 14:44:39.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/alpha/kernel/process.c	2006-01-26 15:19:42.000000000 +0200
@@ -30,6 +30,7 @@
 #include <linux/reboot.h>
 #include <linux/tty.h>
 #include <linux/console.h>
+#include <linux/sched.h>
 
 #include <asm/reg.h>
 #include <asm/uaccess.h>
@@ -74,9 +75,6 @@
 cpu_idle(void)
 {
 	/* An endless idle loop with no priority at all.  */
-	current->nice = 20;
-	current->counter = -100;
-
 	while (1) {
 		/* FIXME -- EV6 and LCA45 know how to power down
 		   the CPU.  */
@@ -186,6 +184,7 @@
 	args.mode = mode;
 	args.restart_cmd = restart_cmd;
 #ifdef CONFIG_SMP
+	preempt_disable();
 	smp_call_function(common_shutdown_1, &args, 1, 0);
 #endif
 	common_shutdown_1(&args);
diff -Nur linux-2.4.33-imedia/arch/alpha/kernel/smp.c linux-2.4.33-imedia-patching/arch/alpha/kernel/smp.c
--- linux-2.4.33-imedia/arch/alpha/kernel/smp.c	2003-06-13 17:51:29.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/alpha/kernel/smp.c	2006-01-26 15:19:42.000000000 +0200
@@ -81,6 +81,7 @@
 int smp_num_probed;		/* Internal processor count */
 int smp_num_cpus = 1;		/* Number that came online.  */
 int smp_threads_ready;		/* True once the per process idle is forked. */
+unsigned long cache_decay_ticks;
 
 int __cpu_number_map[NR_CPUS];
 int __cpu_logical_map[NR_CPUS];
@@ -155,11 +156,6 @@
 {
 	int cpuid = hard_smp_processor_id();
 
-	if (current != init_tasks[cpu_number_map(cpuid)]) {
-		printk("BUG: smp_calling: cpu %d current %p init_tasks[cpu_number_map(cpuid)] %p\n",
-		       cpuid, current, init_tasks[cpu_number_map(cpuid)]);
-	}
-
 	DBGS(("CALLIN %d state 0x%lx\n", cpuid, current->state));
 
 	/* Turn on machine checks.  */
@@ -217,9 +213,6 @@
 	DBGS(("smp_callin: commencing CPU %d current %p\n",
 	      cpuid, current));
 
-	/* Setup the scheduler for this processor.  */
-	init_idle();
-
 	/* ??? This should be in init_idle.  */
 	atomic_inc(&init_mm.mm_count);
 	current->active_mm = &init_mm;
@@ -449,14 +442,11 @@
 	if (idle == &init_task)
 		panic("idle process is init_task for CPU %d", cpuid);
 
-	idle->processor = cpuid;
-	idle->cpus_runnable = 1 << cpuid; /* we schedule the first task manually */
+	init_idle(idle, cpuid);
+	unhash_process(idle);
+
 	__cpu_logical_map[cpunum] = cpuid;
 	__cpu_number_map[cpuid] = cpunum;
- 
-	del_from_runqueue(idle);
-	unhash_process(idle);
-	init_tasks[cpunum] = idle;
 
 	DBGS(("smp_boot_one_cpu: CPU %d state 0x%lx flags 0x%lx\n",
 	      cpuid, idle->state, idle->flags));
@@ -563,13 +553,10 @@
 
 	__cpu_number_map[boot_cpuid] = 0;
 	__cpu_logical_map[0] = boot_cpuid;
-	current->processor = boot_cpuid;
 
 	smp_store_cpu_info(boot_cpuid);
 	smp_setup_percpu_timer(boot_cpuid);
 
-	init_idle();
-
 	/* ??? This should be in init_idle.  */
 	atomic_inc(&init_mm.mm_count);
 	current->active_mm = &init_mm;
diff -Nur linux-2.4.33-imedia/arch/arm/kernel/process.c linux-2.4.33-imedia-patching/arch/arm/kernel/process.c
--- linux-2.4.33-imedia/arch/arm/kernel/process.c	2003-08-25 14:44:39.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/arm/kernel/process.c	2006-01-26 15:19:42.000000000 +0200
@@ -87,8 +87,6 @@
 {
 	/* endless idle loop with no priority at all */
 	init_idle();
-	current->nice = 20;
-	current->counter = -100;
 
 	while (1) {
 		void (*idle)(void) = pm_idle;
diff -Nur linux-2.4.33-imedia/arch/cris/kernel/process.c linux-2.4.33-imedia-patching/arch/cris/kernel/process.c
--- linux-2.4.33-imedia/arch/cris/kernel/process.c	2003-08-25 14:44:39.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/cris/kernel/process.c	2006-01-26 15:19:42.000000000 +0200
@@ -163,8 +163,6 @@
 {
 	/* endless idle loop with no priority at all */
 	init_idle();
-	current->nice = 20;
-	current->counter = -100;
 
 	while(1) {
 		void (*idle)(void) = pm_idle;
diff -Nur linux-2.4.33-imedia/arch/i386/config.in linux-2.4.33-imedia-patching/arch/i386/config.in
--- linux-2.4.33-imedia/arch/i386/config.in	2004-11-17 13:54:21.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/i386/config.in	2006-01-26 15:19:42.000000000 +0200
@@ -25,6 +25,9 @@
 
 mainmenu_option next_comment
 comment 'Processor type and features'
+bool 'Low latency scheduling' CONFIG_LOLAT
+dep_bool 'Control low latency with sysctl' CONFIG_LOLAT_SYSCTL $CONFIG_LOLAT
+
 choice 'Processor family' \
 	"386					CONFIG_M386 \
 	 486					CONFIG_M486 \
@@ -225,6 +228,7 @@
 bool 'Math emulation' CONFIG_MATH_EMULATION
 bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
 bool 'Symmetric multi-processing support' CONFIG_SMP
+bool 'Preemptible Kernel' CONFIG_PREEMPT
 if [ "$CONFIG_SMP" != "y" ]; then
    bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC
    dep_bool 'IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC $CONFIG_X86_UP_APIC
@@ -258,9 +262,12 @@
    fi
 fi
 
-if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
-   define_bool CONFIG_HAVE_DEC_LOCK y
+if [ "$CONFIG_SMP" = "y" -o "$CONFIG_PREEMPT" = "y" ]; then
+   if [ "$CONFIG_X86_CMPXCHG" = "y" ]; then
+      define_bool CONFIG_HAVE_DEC_LOCK y
+   fi
 fi
+
 endmenu
 
 mainmenu_option next_comment
diff -Nur linux-2.4.33-imedia/arch/i386/defconfig linux-2.4.33-imedia-patching/arch/i386/defconfig
--- linux-2.4.33-imedia/arch/i386/defconfig	2005-01-19 16:09:25.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/i386/defconfig	2006-01-26 15:19:42.000000000 +0200
@@ -64,7 +64,9 @@
 # CONFIG_HIGHMEM is not set
 # CONFIG_MATH_EMULATION is not set
 # CONFIG_MTRR is not set
-CONFIG_SMP=y
+# CONFIG_SMP is not set
+CONFIG_PREEMPT=y
+CONFIG_LOWLAT=y
 CONFIG_NR_CPUS=32
 # CONFIG_X86_NUMA is not set
 # CONFIG_X86_TSC_DISABLE is not set
diff -Nur linux-2.4.33-imedia/arch/i386/kernel/cpuid.c linux-2.4.33-imedia-patching/arch/i386/kernel/cpuid.c
--- linux-2.4.33-imedia/arch/i386/kernel/cpuid.c	2001-10-11 19:04:57.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/i386/kernel/cpuid.c	2006-01-26 15:19:42.000000000 +0200
@@ -60,7 +60,8 @@
 static inline void do_cpuid(int cpu, u32 reg, u32 *data)
 {
   struct cpuid_command cmd;
-  
+
+  preempt_disable();
   if ( cpu == smp_processor_id() ) {
     cpuid(reg, &data[0], &data[1], &data[2], &data[3]);
   } else {
@@ -70,6 +71,7 @@
     
     smp_call_function(cpuid_smp_cpuid, &cmd, 1, 1);
   }
+  preempt_enable();
 }
 #else /* ! CONFIG_SMP */
 
diff -Nur linux-2.4.33-imedia/arch/i386/kernel/entry.S linux-2.4.33-imedia-patching/arch/i386/kernel/entry.S
--- linux-2.4.33-imedia/arch/i386/kernel/entry.S	2003-06-13 17:51:29.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/i386/kernel/entry.S	2006-01-26 15:19:42.000000000 +0200
@@ -73,16 +73,36 @@
  * these are offsets into the task-struct.
  */
 state		=  0
-flags		=  4
+preempt_count	=  4
 sigpending	=  8
 addr_limit	= 12
 exec_domain	= 16
 need_resched	= 20
 tsk_ptrace	= 24
-processor	= 52
+cpu		= 32
+
+/* These are offsets into the irq_stat structure
+ * There is one per cpu and it is aligned to 32
+ * byte boundry (we put that here as a shift count)
+ */
+irq_array_shift                 = CONFIG_X86_L1_CACHE_SHIFT
+
+irq_stat_local_irq_count        = 4
+irq_stat_local_bh_count         = 8
 
 ENOSYS = 38
 
+#ifdef CONFIG_SMP
+#define GET_CPU_INDX	movl cpu(%ebx),%eax;  \
+                        shll $irq_array_shift,%eax
+#define GET_CURRENT_CPU_INDX GET_CURRENT(%ebx); \
+                             GET_CPU_INDX
+#define CPU_INDX (,%eax)
+#else
+#define GET_CPU_INDX
+#define GET_CURRENT_CPU_INDX GET_CURRENT(%ebx)
+#define CPU_INDX
+#endif
 
 #define SAVE_ALL \
 	cld; \
@@ -184,9 +204,11 @@
 
 
 ENTRY(ret_from_fork)
+#if CONFIG_SMP
 	pushl %ebx
 	call SYMBOL_NAME(schedule_tail)
 	addl $4, %esp
+#endif
 	GET_CURRENT(%ebx)
 	testb $0x02,tsk_ptrace(%ebx)	# PT_TRACESYS
 	jne tracesys_exit
@@ -255,16 +277,43 @@
 	ALIGN
 ENTRY(ret_from_intr)
 	GET_CURRENT(%ebx)
+#ifdef CONFIG_PREEMPT
+	cli
+	decl preempt_count(%ebx)
+#endif
 ret_from_exception:
 	movl EFLAGS(%esp),%eax		# mix EFLAGS and CS
 	movb CS(%esp),%al
 	testl $(VM_MASK | 3),%eax	# return to VM86 mode or non-supervisor?
 	jne ret_from_sys_call
+#ifdef CONFIG_PREEMPT
+	cmpl $0,preempt_count(%ebx)
+	jnz restore_all
+	cmpl $0,need_resched(%ebx)
+	jz restore_all
+	movl SYMBOL_NAME(irq_stat)+irq_stat_local_bh_count CPU_INDX,%ecx
+	addl SYMBOL_NAME(irq_stat)+irq_stat_local_irq_count CPU_INDX,%ecx
+	jnz restore_all
+	incl preempt_count(%ebx)
+	sti
+	call SYMBOL_NAME(preempt_schedule)
+	jmp ret_from_intr
+#else
 	jmp restore_all
+#endif
 
 	ALIGN
 reschedule:
-	call SYMBOL_NAME(schedule)    # test
+	movl EFLAGS(%esp),%eax		# mix EFLAGS and CS
+	movb CS(%esp),%al
+	testl $(VM_MASK | 3),%eax	# return to VM86 mode or non-supervisor?
+	jne userspace_resched
+
+	call SYMBOL_NAME(schedule)
+	jmp ret_from_sys_call
+
+userspace_resched:
+	call SYMBOL_NAME(schedule_userspace)
 	jmp ret_from_sys_call
 
 ENTRY(divide_error)
@@ -297,6 +346,9 @@
 	GET_CURRENT(%ebx)
 	call *%edi
 	addl $8,%esp
+#ifdef CONFIG_PREEMPT
+	cli
+#endif
 	jmp ret_from_exception
 
 ENTRY(coprocessor_error)
@@ -316,12 +368,18 @@
 	movl %cr0,%eax
 	testl $0x4,%eax			# EM (math emulation bit)
 	jne device_not_available_emulate
+#ifdef CONFIG_PREEMPT
+	cli
+#endif
 	call SYMBOL_NAME(math_state_restore)
 	jmp ret_from_exception
 device_not_available_emulate:
 	pushl $0		# temporary storage for ORIG_EIP
 	call  SYMBOL_NAME(math_emulate)
 	addl $4,%esp
+#ifdef CONFIG_PREEMPT
+	cli
+#endif
 	jmp ret_from_exception
 
 ENTRY(debug)
@@ -645,8 +703,8 @@
  	.long SYMBOL_NAME(sys_tkill)
 	.long SYMBOL_NAME(sys_sendfile64)
 	.long SYMBOL_NAME(sys_ni_syscall)	/* 240 reserved for futex */
-	.long SYMBOL_NAME(sys_ni_syscall)	/* reserved for sched_setaffinity */
-	.long SYMBOL_NAME(sys_ni_syscall)	/* reserved for sched_getaffinity */
+	.long SYMBOL_NAME(sys_sched_setaffinity)
+	.long SYMBOL_NAME(sys_sched_getaffinity)
 	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_set_thread_area */
 	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_get_thread_area */
 	.long SYMBOL_NAME(sys_ni_syscall)	/* 245 sys_io_setup */
diff -Nur linux-2.4.33-imedia/arch/i386/kernel/i387.c linux-2.4.33-imedia-patching/arch/i386/kernel/i387.c
--- linux-2.4.33-imedia/arch/i386/kernel/i387.c	2005-04-04 04:42:19.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/i386/kernel/i387.c	2006-01-26 15:19:42.000000000 +0200
@@ -10,6 +10,7 @@
 
 #include <linux/config.h>
 #include <linux/sched.h>
+#include <linux/spinlock.h>
 #include <linux/init.h>
 #include <asm/processor.h>
 #include <asm/i387.h>
@@ -89,6 +90,8 @@
 {
 	struct task_struct *tsk = current;
 
+	preempt_disable();
+	
 	if (tsk->flags & PF_USEDFPU) {
 		__save_init_fpu(tsk);
 		return;
diff -Nur linux-2.4.33-imedia/arch/i386/kernel/ioport.c linux-2.4.33-imedia-patching/arch/i386/kernel/ioport.c
--- linux-2.4.33-imedia/arch/i386/kernel/ioport.c	2003-06-13 17:51:29.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/i386/kernel/ioport.c	2006-01-26 15:19:42.000000000 +0200
@@ -55,7 +55,7 @@
 asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 {
 	struct thread_struct * t = &current->thread;
-	struct tss_struct * tss = init_tss + smp_processor_id();
+	struct tss_struct * tss;
 
 	if ((from + num <= from) || (from + num > IO_BITMAP_SIZE*32))
 		return -EINVAL;
@@ -66,6 +66,8 @@
 	 * IO bitmap up. ioperm() is much less timing critical than clone(),
 	 * this is why we delay this operation until now:
 	 */
+	preempt_disable();
+	tss = init_tss + smp_processor_id();
 	if (!t->ioperm) {
 		/*
 		 * just in case ...
@@ -84,6 +86,7 @@
 		memcpy(tss->io_bitmap, t->io_bitmap, IO_BITMAP_BYTES);
 		tss->bitmap = IO_BITMAP_OFFSET; /* Activate it in the TSS */
 	}
+	preempt_enable();
 
 	return 0;
 }
diff -Nur linux-2.4.33-imedia/arch/i386/kernel/irq.c linux-2.4.33-imedia-patching/arch/i386/kernel/irq.c
--- linux-2.4.33-imedia/arch/i386/kernel/irq.c	2003-11-28 20:26:19.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/i386/kernel/irq.c	2006-01-26 15:19:42.000000000 +0200
@@ -284,9 +284,11 @@
 				show("wait_on_irq");
 				count = ~0;
 			}
+			preempt_disable();
 			__sti();
 			SYNC_OTHER_CORES(cpu);
 			__cli();
+			preempt_enable_no_resched();
 			if (irqs_running())
 				continue;
 			if (global_irq_lock)
@@ -360,8 +362,9 @@
 
 	__save_flags(flags);
 	if (flags & (1 << EFLAGS_IF_SHIFT)) {
-		int cpu = smp_processor_id();
+		int cpu;
 		__cli();
+		cpu = smp_processor_id();
 		if (!local_irq_count(cpu))
 			get_irqlock(cpu);
 	}
@@ -369,11 +372,14 @@
 
 void __global_sti(void)
 {
-	int cpu = smp_processor_id();
+	int cpu;
 
+	preempt_disable();
+	cpu = smp_processor_id();
 	if (!local_irq_count(cpu))
 		release_irqlock(cpu);
 	__sti();
+	preempt_enable();
 }
 
 /*
@@ -388,13 +394,15 @@
 	int retval;
 	int local_enabled;
 	unsigned long flags;
-	int cpu = smp_processor_id();
+	int cpu;
 
 	__save_flags(flags);
 	local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1;
 	/* default to local */
 	retval = 2 + local_enabled;
 
+	preempt_disable();
+	cpu = smp_processor_id();
 	/* check for global flags if we're not in an interrupt */
 	if (!local_irq_count(cpu)) {
 		if (local_enabled)
@@ -402,6 +410,7 @@
 		if (global_irq_holder == cpu)
 			retval = 0;
 	}
+	preempt_enable();
 	return retval;
 }
 
diff -Nur linux-2.4.33-imedia/arch/i386/kernel/ldt.c linux-2.4.33-imedia-patching/arch/i386/kernel/ldt.c
--- linux-2.4.33-imedia/arch/i386/kernel/ldt.c	2004-02-18 15:36:30.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/i386/kernel/ldt.c	2006-01-26 15:19:42.000000000 +0200
@@ -190,6 +190,7 @@
 			goto out;
 	}
 
+	preempt_disable();
 	down(&mm->context.sem);
 	if (ldt_info.entry_number >= mm->context.size) {
 		error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
@@ -236,6 +237,7 @@
 
 out_unlock:
 	up(&mm->context.sem);
+	preempt_enable();
 out:
 	return error;
 }
diff -Nur linux-2.4.33-imedia/arch/i386/kernel/microcode.c linux-2.4.33-imedia-patching/arch/i386/kernel/microcode.c
--- linux-2.4.33-imedia/arch/i386/kernel/microcode.c	2005-01-19 16:09:25.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/i386/kernel/microcode.c	2006-01-26 15:19:42.000000000 +0200
@@ -412,11 +412,14 @@
 		goto out_free;
 	}
 
+	preempt_disable();
 	if (smp_call_function(do_update_one, NULL, 1, 1) != 0) {
 		printk(KERN_ERR "microcode: Error! Could not run on all processors\n");
+		preempt_enable();
 		error = -EIO;
 	}
 	do_update_one(NULL);
+	preempt_enable();
 
 out_free:
 	for (i = 0; i < smp_num_cpus; i++) {
diff -Nur linux-2.4.33-imedia/arch/i386/kernel/msr.c linux-2.4.33-imedia-patching/arch/i386/kernel/msr.c
--- linux-2.4.33-imedia/arch/i386/kernel/msr.c	2001-10-11 19:04:57.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/i386/kernel/msr.c	2006-01-26 15:19:42.000000000 +0200
@@ -114,8 +114,9 @@
 {
   struct msr_command cmd;
 
+  preempt_disable();
   if ( cpu == smp_processor_id() ) {
-    return wrmsr_eio(reg, eax, edx);
+    cmd.err = wrmsr_eio(reg, eax, edx);
   } else {
     cmd.cpu = cpu;
     cmd.reg = reg;
@@ -123,16 +124,19 @@
     cmd.data[1] = edx;
     
     smp_call_function(msr_smp_wrmsr, &cmd, 1, 1);
-    return cmd.err;
   }
+
+  preempt_enable();
+  return cmd.err;
 }
 
 static inline int do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx)
 {
   struct msr_command cmd;
 
+  preempt_disable();
   if ( cpu == smp_processor_id() ) {
-    return rdmsr_eio(reg, eax, edx);
+    cmd.err = rdmsr_eio(reg, eax, edx);
   } else {
     cmd.cpu = cpu;
     cmd.reg = reg;
@@ -141,9 +145,10 @@
     
     *eax = cmd.data[0];
     *edx = cmd.data[1];
-
-    return cmd.err;
   }
+
+  preempt_enable();
+  return cmd.err;
 }
 
 #else /* ! CONFIG_SMP */
diff -Nur linux-2.4.33-imedia/arch/i386/kernel/mtrr.c linux-2.4.33-imedia-patching/arch/i386/kernel/mtrr.c
--- linux-2.4.33-imedia/arch/i386/kernel/mtrr.c	2005-06-01 03:56:56.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/i386/kernel/mtrr.c	2006-01-26 15:19:42.000000000 +0200
@@ -1065,6 +1065,9 @@
     wait_barrier_execute = TRUE;
     wait_barrier_cache_enable = TRUE;
     atomic_set (&undone_count, smp_num_cpus - 1);
+
+    preempt_disable();
+
     /*  Start the ball rolling on other CPUs  */
     if (smp_call_function (ipi_handler, &data, 1, 0) != 0)
 	panic ("mtrr: timed out waiting for other CPUs\n");
@@ -1090,6 +1093,9 @@
 	then enable the local cache and return  */
     wait_barrier_cache_enable = FALSE;
     set_mtrr_done (&ctxt);
+
+    preempt_enable();
+
 }   /*  End Function set_mtrr_smp  */
 
 
diff -Nur linux-2.4.33-imedia/arch/i386/kernel/process.c linux-2.4.33-imedia-patching/arch/i386/kernel/process.c
--- linux-2.4.33-imedia/arch/i386/kernel/process.c	2005-11-16 21:12:54.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/i386/kernel/process.c	2006-01-26 15:19:42.000000000 +0200
@@ -126,15 +126,12 @@
 void cpu_idle (void)
 {
 	/* endless idle loop with no priority at all */
-	init_idle();
-	current->nice = 20;
-	current->counter = -100;
 
 	while (1) {
 		void (*idle)(void) = pm_idle;
 		if (!idle)
 			idle = default_idle;
-		while (!current->need_resched)
+		if (!current->need_resched)
 			idle();
 		schedule();
 		check_pgt_cache();
@@ -665,15 +662,17 @@
 	asm volatile("mov %%gs,%0":"=m" (prev->gs));
 
 	/*
-	 * Restore %fs and %gs.
+	 * Restore %fs and %gs if needed.
 	 */
-	loadsegment(fs, next->fs);
-	loadsegment(gs, next->gs);
+	if (unlikely(prev->fs | prev->gs | next->fs | next->gs)) {
+		loadsegment(fs, next->fs);
+		loadsegment(gs, next->gs);
+	}
 
 	/*
 	 * Now maybe reload the debug registers
 	 */
-	if (next->debugreg[7]){
+	if (unlikely(next->debugreg[7])) {
 		loaddebug(next, 0);
 		loaddebug(next, 1);
 		loaddebug(next, 2);
@@ -683,7 +682,7 @@
 		loaddebug(next, 7);
 	}
 
-	if (prev->ioperm || next->ioperm) {
+	if (unlikely(prev->ioperm || next->ioperm)) {
 		if (next->ioperm) {
 			/*
 			 * 4 cachelines copy ... not good, but not that
diff -Nur linux-2.4.33-imedia/arch/i386/kernel/setup.c linux-2.4.33-imedia-patching/arch/i386/kernel/setup.c
--- linux-2.4.33-imedia/arch/i386/kernel/setup.c	2005-04-04 04:42:19.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/i386/kernel/setup.c	2006-01-26 15:19:42.000000000 +0200
@@ -3224,9 +3224,10 @@
 	load_TR(nr);
 	load_LDT(&init_mm.context);
 
-	/*
-	 * Clear all 6 debug registers:
-	 */
+	/* Clear %fs and %gs. */
+	asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
+
+	/* Clear all 6 debug registers: */
 
 #define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
 
diff -Nur linux-2.4.33-imedia/arch/i386/kernel/smp.c linux-2.4.33-imedia-patching/arch/i386/kernel/smp.c
--- linux-2.4.33-imedia/arch/i386/kernel/smp.c	2004-11-17 13:54:21.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/i386/kernel/smp.c	2006-01-26 15:19:42.000000000 +0200
@@ -360,10 +360,14 @@
 
 asmlinkage void smp_invalidate_interrupt (void)
 {
-	unsigned long cpu = smp_processor_id();
+	unsigned long cpu;
+
+	preempt_disable();
+
+	cpu = smp_processor_id();
 
 	if (!test_bit(cpu, &flush_cpumask))
-		return;
+		goto out;
 		/* 
 		 * This was a BUG() but until someone can quote me the
 		 * line from the intel manual that guarantees an IPI to
@@ -384,6 +388,8 @@
 	}
 	ack_APIC_irq();
 	clear_bit(cpu, &flush_cpumask);
+out:
+	preempt_enable();
 }
 
 static void flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
@@ -427,23 +433,28 @@
 
 	flush_mm = NULL;
 	flush_va = 0;
-	spin_unlock(&tlbstate_lock);
+	_raw_spin_unlock(&tlbstate_lock);
 }
 	
 void flush_tlb_current_task(void)
 {
 	struct mm_struct *mm = current->mm;
-	unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
+	unsigned long cpu_mask;
 
+	preempt_disable();
+	cpu_mask = mm->cpu_vm_mask & ~(1UL << smp_processor_id());
 	local_flush_tlb();
 	if (cpu_mask)
 		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+	preempt_enable();
 }
 
 void flush_tlb_mm (struct mm_struct * mm)
 {
-	unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
+	unsigned long cpu_mask;
 
+	preempt_disable();
+	cpu_mask = mm->cpu_vm_mask & ~(1UL << smp_processor_id());
 	if (current->active_mm == mm) {
 		if (current->mm)
 			local_flush_tlb();
@@ -452,13 +463,16 @@
 	}
 	if (cpu_mask)
 		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+	preempt_enable();
 }
 
 void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
+	unsigned long cpu_mask;
 
+	preempt_disable();
+	cpu_mask = mm->cpu_vm_mask & ~(1UL << smp_processor_id());
 	if (current->active_mm == mm) {
 		if(current->mm)
 			__flush_tlb_one(va);
@@ -468,6 +482,7 @@
 
 	if (cpu_mask)
 		flush_tlb_others(cpu_mask, mm, va);
+	preempt_enable();
 }
 
 static inline void do_flush_tlb_all_local(void)
@@ -486,9 +501,11 @@
 
 void flush_tlb_all(void)
 {
+	preempt_disable();
 	smp_call_function (flush_tlb_all_ipi,0,1,1);
 
 	do_flush_tlb_all_local();
+	preempt_enable();
 }
 
 /*
@@ -503,6 +520,17 @@
 }
 
 /*
+ * this function sends a reschedule IPI to all (other) CPUs.
+ * This should only be used if some 'global' task became runnable,
+ * such as a RT task, that must be handled now. The first CPU
+ * that manages to grab the task will run it.
+ */
+void fastcall smp_send_reschedule_all(void)
+{
+	send_IPI_allbutself(RESCHEDULE_VECTOR);
+}
+
+/*
  * Structure and data for smp_call_function(). This is designed to minimise
  * static memory requirements. It also looks cleaner.
  */
@@ -564,7 +592,7 @@
 	if (wait)
 		while (atomic_read(&data.finished) != cpus)
 			barrier();
-	spin_unlock(&call_lock);
+	_raw_spin_unlock(&call_lock);
 
 	return 0;
 }
@@ -572,7 +600,7 @@
 static void stop_this_cpu (void * dummy)
 {
 	/*
-	 * Remove this CPU:
+	 * Remove this CPU: assumes preemption is disabled
 	 */
 	clear_bit(smp_processor_id(), &cpu_online_map);
 	__cli();
diff -Nur linux-2.4.33-imedia/arch/i386/kernel/smpboot.c linux-2.4.33-imedia-patching/arch/i386/kernel/smpboot.c
--- linux-2.4.33-imedia/arch/i386/kernel/smpboot.c	2004-04-14 16:05:25.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/i386/kernel/smpboot.c	2006-01-26 15:19:42.000000000 +0200
@@ -353,7 +353,7 @@
 	 * (This works even if the APIC is not enabled.)
 	 */
 	phys_id = GET_APIC_ID(apic_read(APIC_ID));
-	cpuid = current->processor;
+	cpuid = cpu();
 	if (test_and_set_bit(cpuid, &cpu_online_map)) {
 		printk("huh, phys CPU#%d, CPU#%d already present??\n",
 					phys_id, cpuid);
@@ -423,6 +423,7 @@
 	 */
  	smp_store_cpu_info(cpuid);
 
+	disable_APIC_timer();
 	/*
 	 * Allow the master to continue.
 	 */
@@ -453,6 +454,7 @@
 	smp_callin();
 	while (!atomic_read(&smp_commenced))
 		rep_nop();
+	enable_APIC_timer();
 	/*
 	 * low-memory mappings have been cleared, flush them from
 	 * the local TLBs too.
@@ -791,16 +793,13 @@
 	if (!idle)
 		panic("No idle process for CPU %d", cpu);
 
-	idle->processor = cpu;
-	idle->cpus_runnable = 1 << cpu; /* we schedule the first task manually */
+	init_idle(idle, cpu);
 
 	map_cpu_to_boot_apicid(cpu, apicid);
 
 	idle->thread.eip = (unsigned long) start_secondary;
 
-	del_from_runqueue(idle);
 	unhash_process(idle);
-	init_tasks[cpu] = idle;
 
 	/* start_eip had better be page-aligned! */
 	start_eip = setup_trampoline();
@@ -913,6 +912,7 @@
 }
 
 cycles_t cacheflush_time;
+unsigned long cache_decay_ticks;
 
 static void smp_tune_scheduling (void)
 {
@@ -946,9 +946,13 @@
 		cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
 	}
 
+	cache_decay_ticks = (long)cacheflush_time/cpu_khz * HZ / 1000;
+
 	printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
 		(long)cacheflush_time/(cpu_khz/1000),
 		((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
+	printk("task migration cache decay timeout: %ld msecs.\n",
+		(cache_decay_ticks + 1) * 1000 / HZ);
 }
 
 /*
@@ -1014,8 +1018,7 @@
 	map_cpu_to_boot_apicid(0, boot_cpu_apicid);
 
 	global_irq_holder = 0;
-	current->processor = 0;
-	init_idle();
+	current->cpu = 0;
 	smp_tune_scheduling();
 
 	/*
diff -Nur linux-2.4.33-imedia/arch/i386/kernel/traps.c linux-2.4.33-imedia-patching/arch/i386/kernel/traps.c
--- linux-2.4.33-imedia/arch/i386/kernel/traps.c	2005-11-16 21:12:54.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/i386/kernel/traps.c	2006-01-26 15:19:42.000000000 +0200
@@ -750,6 +750,8 @@
  *
  * Careful.. There are problems with IBM-designed IRQ13 behaviour.
  * Don't touch unless you *really* know how it works.
+ *
+ * Must be called with kernel preemption disabled.
  */
 asmlinkage void math_state_restore(struct pt_regs regs)
 {
diff -Nur linux-2.4.33-imedia/arch/i386/lib/dec_and_lock.c linux-2.4.33-imedia-patching/arch/i386/lib/dec_and_lock.c
--- linux-2.4.33-imedia/arch/i386/lib/dec_and_lock.c	2000-07-08 04:20:16.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/i386/lib/dec_and_lock.c	2006-01-26 15:19:42.000000000 +0200
@@ -8,6 +8,7 @@
  */
 
 #include <linux/spinlock.h>
+#include <linux/sched.h>
 #include <asm/atomic.h>
 
 int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
diff -Nur linux-2.4.33-imedia/arch/i386/mm/init.c linux-2.4.33-imedia-patching/arch/i386/mm/init.c
--- linux-2.4.33-imedia/arch/i386/mm/init.c	2004-04-14 16:05:25.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/i386/mm/init.c	2006-01-26 15:19:42.000000000 +0200
@@ -46,6 +46,7 @@
 int do_check_pgt_cache(int low, int high)
 {
 	int freed = 0;
+	preempt_disable();
 	if(pgtable_cache_size > high) {
 		do {
 			if (pgd_quicklist) {
@@ -62,6 +63,7 @@
 			}
 		} while(pgtable_cache_size > low);
 	}
+	preempt_enable();
 	return freed;
 }
 
diff -Nur linux-2.4.33-imedia/arch/ia64/kernel/entry.S linux-2.4.33-imedia-patching/arch/ia64/kernel/entry.S
--- linux-2.4.33-imedia/arch/ia64/kernel/entry.S	2005-04-04 04:42:19.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/ia64/kernel/entry.S	2006-01-26 15:19:42.000000000 +0200
@@ -175,7 +175,7 @@
 	mov r8=r13			// return pointer to previously running task
 	mov r13=in0			// set "current" pointer
 	;;
-	ssm psr.i			// renable psr.i AFTER the ic bit is serialized
+	ssm psr.i
 	DO_LOAD_SWITCH_STACK
 
 #ifdef CONFIG_SMP
diff -Nur linux-2.4.33-imedia/arch/ia64/kernel/process.c linux-2.4.33-imedia-patching/arch/ia64/kernel/process.c
--- linux-2.4.33-imedia/arch/ia64/kernel/process.c	2005-04-04 04:42:19.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/ia64/kernel/process.c	2006-01-26 15:19:42.000000000 +0200
@@ -147,9 +147,6 @@
 cpu_idle (void *unused)
 {
 	init_idle();
-	current->nice = 20;
-	current->counter = -100;
-
 
 	/* endless idle loop with no priority at all */
 	while (1) {
diff -Nur linux-2.4.33-imedia/arch/m68k/kernel/process.c linux-2.4.33-imedia-patching/arch/m68k/kernel/process.c
--- linux-2.4.33-imedia/arch/m68k/kernel/process.c	2003-06-13 17:51:31.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/m68k/kernel/process.c	2006-01-26 15:19:42.000000000 +0200
@@ -80,8 +80,6 @@
 {
 	/* endless idle loop with no priority at all */
 	init_idle();
-	current->nice = 20;
-	current->counter = -100;
 	idle();
 }
 
diff -Nur linux-2.4.33-imedia/arch/mips/config-shared.in linux-2.4.33-imedia-patching/arch/mips/config-shared.in
--- linux-2.4.33-imedia/arch/mips/config-shared.in	2005-01-19 16:09:27.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/mips/config-shared.in	2006-01-26 15:19:42.000000000 +0200
@@ -838,6 +838,7 @@
    define_bool CONFIG_HOTPLUG_PCI n
 fi
 
+dep_bool 'Preemptible Kernel' CONFIG_PREEMPT $CONFIG_NEW_IRQ
 bool 'System V IPC' CONFIG_SYSVIPC
 bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
 bool 'Sysctl support' CONFIG_SYSCTL
diff -Nur linux-2.4.33-imedia/arch/mips/kernel/i8259.c linux-2.4.33-imedia-patching/arch/mips/kernel/i8259.c
--- linux-2.4.33-imedia/arch/mips/kernel/i8259.c	2003-08-25 14:44:40.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/mips/kernel/i8259.c	2006-01-26 15:19:42.000000000 +0200
@@ -8,6 +8,7 @@
  * Copyright (C) 1992 Linus Torvalds
  * Copyright (C) 1994 - 2000 Ralf Baechle
  */
+#include <linux/sched.h>
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
diff -Nur linux-2.4.33-imedia/arch/mips/kernel/irq.c linux-2.4.33-imedia-patching/arch/mips/kernel/irq.c
--- linux-2.4.33-imedia/arch/mips/kernel/irq.c	2004-02-18 15:36:30.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/mips/kernel/irq.c	2006-01-26 15:19:42.000000000 +0200
@@ -8,6 +8,8 @@
  * Copyright (C) 1992 Linus Torvalds
  * Copyright (C) 1994 - 2000 Ralf Baechle
  */
+
+#include <linux/sched.h>
 #include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
@@ -19,11 +21,13 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/random.h>
-#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/ptrace.h>
 
 #include <asm/atomic.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <asm/debug.h>
 
 /*
  * Controller mappings for all interrupt sources:
@@ -429,6 +433,8 @@
 	struct irqaction * action;
 	unsigned int status;
 
+	preempt_disable();
+
 	kstat.irqs[cpu][irq]++;
 	spin_lock(&desc->lock);
 	desc->handler->ack(irq);
@@ -490,6 +496,27 @@
 
 	if (softirq_pending(cpu))
 		do_softirq();
+
+#if defined(CONFIG_PREEMPT)
+	while (--current->preempt_count == 0) {
+		db_assert(intr_off());
+		db_assert(!in_interrupt());
+
+		if (current->need_resched == 0) {
+			break;
+		}
+
+		current->preempt_count ++;
+		sti();
+		if (user_mode(regs)) {
+			schedule();
+		} else {
+			preempt_schedule();
+		}
+		cli();
+	}
+#endif
+
 	return 1;
 }
 
diff -Nur linux-2.4.33-imedia/arch/mips/kernel/process.c linux-2.4.33-imedia-patching/arch/mips/kernel/process.c
--- linux-2.4.33-imedia/arch/mips/kernel/process.c	2003-08-25 14:44:40.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/mips/kernel/process.c	2006-01-26 15:19:42.000000000 +0200
@@ -38,8 +38,6 @@
 ATTRIB_NORET void cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
-	current->nice = 20;
-	current->counter = -100;
 	init_idle();
 
 	while (1) {
diff -Nur linux-2.4.33-imedia/arch/mips/mm/extable.c linux-2.4.33-imedia-patching/arch/mips/mm/extable.c
--- linux-2.4.33-imedia/arch/mips/mm/extable.c	2002-11-29 01:53:10.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/mips/mm/extable.c	2006-01-26 15:19:42.000000000 +0200
@@ -3,6 +3,7 @@
  */
 #include <linux/config.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <asm/uaccess.h>
 
diff -Nur linux-2.4.33-imedia/arch/mips64/kernel/process.c linux-2.4.33-imedia-patching/arch/mips64/kernel/process.c
--- linux-2.4.33-imedia/arch/mips64/kernel/process.c	2003-08-25 14:44:40.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/mips64/kernel/process.c	2006-01-26 15:19:42.000000000 +0200
@@ -39,8 +39,7 @@
 {
 	/* endless idle loop with no priority at all */
 	init_idle();
-	current->nice = 20;
-	current->counter = -100;
+
 	while (1) {
 		while (!current->need_resched)
 			if (cpu_wait)
diff -Nur linux-2.4.33-imedia/arch/parisc/kernel/process.c linux-2.4.33-imedia-patching/arch/parisc/kernel/process.c
--- linux-2.4.33-imedia/arch/parisc/kernel/process.c	2003-06-13 17:51:31.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/parisc/kernel/process.c	2006-01-26 15:19:42.000000000 +0200
@@ -65,8 +65,6 @@
 {
 	/* endless idle loop with no priority at all */
 	init_idle();
-	current->nice = 20;
-	current->counter = -100;
 
 	while (1) {
 		while (!current->need_resched) {
diff -Nur linux-2.4.33-imedia/arch/ppc/8xx_io/uart.c linux-2.4.33-imedia-patching/arch/ppc/8xx_io/uart.c
--- linux-2.4.33-imedia/arch/ppc/8xx_io/uart.c	2005-01-19 16:09:35.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/ppc/8xx_io/uart.c	2006-01-26 15:19:42.000000000 +0200
@@ -1849,7 +1849,6 @@
 		printk("lsr = %d (jiff=%lu)...", lsr, jiffies);
 #endif
 		current->state = TASK_INTERRUPTIBLE;
-/*		current->counter = 0;	 make us low-priority */
 		schedule_timeout(char_time);
 		if (signal_pending(current))
 			break;
diff -Nur linux-2.4.33-imedia/arch/ppc/config.in linux-2.4.33-imedia-patching/arch/ppc/config.in
--- linux-2.4.33-imedia/arch/ppc/config.in	2004-08-08 02:26:04.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/ppc/config.in	2006-01-26 15:19:42.000000000 +0200
@@ -167,6 +167,8 @@
   int  'Maximum number of CPUs (2-32)' CONFIG_NR_CPUS 32
 fi
 
+bool 'Preemptible kernel support' CONFIG_PREEMPT
+
 if [ "$CONFIG_6xx" = "y" -a "$CONFIG_8260" = "n" ];then
   bool 'AltiVec Support' CONFIG_ALTIVEC
   bool 'Thermal Management Support' CONFIG_TAU
diff -Nur linux-2.4.33-imedia/arch/ppc/kernel/entry.S linux-2.4.33-imedia-patching/arch/ppc/kernel/entry.S
--- linux-2.4.33-imedia/arch/ppc/kernel/entry.S	2004-04-14 16:05:27.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/ppc/kernel/entry.S	2006-01-26 15:19:42.000000000 +0200
@@ -273,7 +273,9 @@
 
 	.globl	ret_from_fork
 ret_from_fork:
+#ifdef CONFIG_SMP
 	bl	schedule_tail
+#endif
 	lwz	r0,TASK_PTRACE(r2)
 	andi.	r0,r0,PT_TRACESYS
 	bnel-	syscall_trace
@@ -287,6 +289,46 @@
 	 */
 	cmpi	0,r3,0
 	beq	restore
+#ifdef CONFIG_PREEMPT
+	lwz	r3,PREEMPT_COUNT(r2)
+	cmpi	0,r3,1
+	bge	ret_from_except
+	lwz	r5,_MSR(r1)
+	andi.	r5,r5,MSR_PR
+	bne	ret_from_except
+	lwz	r5,NEED_RESCHED(r2)
+	cmpi	0,r5,0
+	beq	ret_from_except
+	lis	r3,irq_stat@h
+	ori	r3,r3,irq_stat@l
+#ifdef CONFIG_SMP
+	lwz     r5,CPU(r2)
+	rlwinm  r5,r5,5,0,26
+	add     r3,r3,r5
+#endif
+	lwz	r5,4(r3)
+	lwz	r3,8(r3)
+	add	r3,r3,r5
+	cmpi	0,r3,0
+	bne	ret_from_except
+	lwz	r3,PREEMPT_COUNT(r2)
+	addi	r3,r3,1
+	stw	r3,PREEMPT_COUNT(r2)
+	mfmsr	r0
+	ori	r0,r0,MSR_EE
+	mtmsr	r0
+	sync
+	bl	preempt_schedule
+	mfmsr	r0
+	rlwinm	r0,r0,0,17,15
+	mtmsr	r0
+	sync
+	lwz	r3,PREEMPT_COUNT(r2)
+	subi	r3,r3,1
+	stw	r3,PREEMPT_COUNT(r2)
+	li	r3,1
+	b	ret_from_intercept
+#endif /* CONFIG_PREEMPT */
 	.globl	ret_from_except
 ret_from_except:
 	lwz	r3,_MSR(r1)	/* Returning to user mode? */
diff -Nur linux-2.4.33-imedia/arch/ppc/kernel/idle.c linux-2.4.33-imedia-patching/arch/ppc/kernel/idle.c
--- linux-2.4.33-imedia/arch/ppc/kernel/idle.c	2003-08-25 14:44:40.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/ppc/kernel/idle.c	2006-01-26 15:19:42.000000000 +0200
@@ -46,9 +46,7 @@
 		do_power_save = 1;
 
 	/* endless loop with no priority at all */
-	current->nice = 20;
-	current->counter = -100;
-	init_idle();
+	init_idle(current, smp_processor_id());
 	for (;;) {
 #ifdef CONFIG_SMP
 		if (!do_power_save) {
@@ -64,13 +62,12 @@
 			}
 		}
 #endif
+#ifdef CONFIG_6xx
 		if (do_power_save && !current->need_resched)
+#endif
 			power_save();
-
-		if (current->need_resched) {
-			schedule();
-			check_pgt_cache();
-		}
+		schedule();
+		check_pgt_cache();
 	}
 	return 0;
 }
diff -Nur linux-2.4.33-imedia/arch/ppc/kernel/irq.c linux-2.4.33-imedia-patching/arch/ppc/kernel/irq.c
--- linux-2.4.33-imedia/arch/ppc/kernel/irq.c	2003-11-28 20:26:19.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/ppc/kernel/irq.c	2006-01-26 15:19:42.000000000 +0200
@@ -551,6 +551,34 @@
 	return 1; /* lets ret_from_int know we can do checks */
 }
 
+#ifdef CONFIG_PREEMPT
+int
+preempt_intercept(struct pt_regs *regs)
+{
+	int ret;
+
+	preempt_disable();
+
+	switch(regs->trap) {
+	case 0x500:
+		ret = do_IRQ(regs);
+		break;
+#if !defined(CONFIG_4xx) || defined(CONFIG_440)
+	case 0x900:
+#else
+	case 0x1000:
+#endif
+		ret = timer_interrupt(regs);
+		break;
+	default:
+		BUG();
+	}
+
+	preempt_enable();
+	return ret;
+}
+#endif /* CONFIG_PREEMPT */
+
 unsigned long probe_irq_on (void)
 {
 	return 0;
@@ -647,11 +675,13 @@
 				show("wait_on_irq");
 				count = ~0;
 			}
+			preempt_disable();
 			__sti();
 			/* don't worry about the lock race Linus found
 			 * on intel here. -- Cort
 			 */
 			__cli();
+			preempt_enable_no_resched();
 			if (atomic_read(&global_irq_count))
 				continue;
 			if (global_irq_lock)
@@ -727,6 +757,8 @@
 	global_irq_holder = cpu;
 }
 
+#define	EFLAGS_IF_SHIFT	15
+
 /*
  * A global "cli()" while in an interrupt context
  * turns into just a local cli(). Interrupts
@@ -744,9 +776,10 @@
 	unsigned long flags;
 
 	__save_flags(flags);
-	if (flags & (1 << 15)) {
-		int cpu = smp_processor_id();
+	if (flags & (1 << EFLAGS_IF_SHIFT)) {
+		int cpu;
 		__cli();
+		cpu = smp_processor_id();
 		if (!local_irq_count(cpu))
 			get_irqlock(cpu);
 	}
@@ -754,11 +787,14 @@
 
 void __global_sti(void)
 {
-	int cpu = smp_processor_id();
+	int cpu;
 
+	preempt_disable();
+	cpu = smp_processor_id();
 	if (!local_irq_count(cpu))
 		release_irqlock(cpu);
 	__sti();
+	preempt_enable();
 }
 
 /*
@@ -773,19 +809,23 @@
 	int retval;
 	int local_enabled;
 	unsigned long flags;
+	int cpu;
 
 	__save_flags(flags);
-	local_enabled = (flags >> 15) & 1;
+	local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1;
 	/* default to local */
 	retval = 2 + local_enabled;
 
 	/* check for global flags if we're not in an interrupt */
-	if (!local_irq_count(smp_processor_id())) {
+	preempt_disable();
+	cpu = smp_processor_id();
+	if (!local_irq_count(cpu)) {
 		if (local_enabled)
 			retval = 1;
-		if (global_irq_holder == (unsigned char) smp_processor_id())
+		if (global_irq_holder == cpu)
 			retval = 0;
 	}
+	preempt_enable();
 	return retval;
 }
 
diff -Nur linux-2.4.33-imedia/arch/ppc/kernel/mk_defs.c linux-2.4.33-imedia-patching/arch/ppc/kernel/mk_defs.c
--- linux-2.4.33-imedia/arch/ppc/kernel/mk_defs.c	2003-11-28 20:26:19.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/ppc/kernel/mk_defs.c	2006-01-26 15:19:42.000000000 +0200
@@ -34,11 +34,13 @@
 	/*DEFINE(KERNELBASE, KERNELBASE);*/
 	DEFINE(STATE, offsetof(struct task_struct, state));
 	DEFINE(NEXT_TASK, offsetof(struct task_struct, next_task));
-	DEFINE(COUNTER, offsetof(struct task_struct, counter));
-	DEFINE(PROCESSOR, offsetof(struct task_struct, processor));
+	DEFINE(CPU, offsetof(struct task_struct, cpu));
 	DEFINE(SIGPENDING, offsetof(struct task_struct, sigpending));
 	DEFINE(THREAD, offsetof(struct task_struct, thread));
 	DEFINE(MM, offsetof(struct task_struct, mm));
+#ifdef CONFIG_PREEMPT
+	DEFINE(PREEMPT_COUNT, offsetof(struct task_struct, preempt_count));
+#endif
 	DEFINE(ACTIVE_MM, offsetof(struct task_struct, active_mm));
 	DEFINE(TASK_STRUCT_SIZE, sizeof(struct task_struct));
 	DEFINE(KSP, offsetof(struct thread_struct, ksp));
diff -Nur linux-2.4.33-imedia/arch/ppc/kernel/open_pic.c linux-2.4.33-imedia-patching/arch/ppc/kernel/open_pic.c
--- linux-2.4.33-imedia/arch/ppc/kernel/open_pic.c	2004-02-18 15:36:30.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/ppc/kernel/open_pic.c	2006-01-26 15:19:42.000000000 +0200
@@ -601,19 +601,24 @@
 void __init do_openpic_setup_cpu(void)
 {
  	int i;
-	u32 msk = 1 << smp_hw_index[smp_processor_id()];
+#ifdef CONFIG_IRQ_ALL_CPUS
+	u32 msk;
+#endif /* CONFIG_IRQ_ALL_CPUS */
 
 	spin_lock(&openpic_setup_lock);
 
 #ifdef CONFIG_IRQ_ALL_CPUS
+	msk = 1 << smp_hw_index[smp_processor_id()];
+
  	/* let the openpic know we want intrs. default affinity
  	 * is 0xffffffff until changed via /proc
  	 * That's how it's done on x86. If we want it differently, then
  	 * we should make sure we also change the default values of irq_affinity
  	 * in irq.c.
  	 */
- 	for (i = 0; i < NumSources; i++)
+ 	for (i = 0; i < NumSources; i++) {
 		openpic_mapirq(i, msk, ~0U);
+	}
 #endif /* CONFIG_IRQ_ALL_CPUS */
  	openpic_set_priority(0);
 
diff -Nur linux-2.4.33-imedia/arch/ppc/kernel/setup.c linux-2.4.33-imedia-patching/arch/ppc/kernel/setup.c
--- linux-2.4.33-imedia/arch/ppc/kernel/setup.c	2004-04-14 16:05:27.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/ppc/kernel/setup.c	2006-01-26 15:19:42.000000000 +0200
@@ -502,6 +502,20 @@
 	strcpy(cmd_line, CONFIG_CMDLINE);
 #endif /* CONFIG_CMDLINE */
 
+#ifdef CONFIG_PREEMPT
+	/* Override the irq routines for external & timer interrupts here,
+	 * as the MMU has only been minimally setup at this point and
+	 * there are no protections on page zero.
+	 */
+	{
+		extern int preempt_intercept(struct pt_regs *);
+	
+		do_IRQ_intercept = (unsigned long) &preempt_intercept;
+		timer_interrupt_intercept = (unsigned long) &preempt_intercept;
+
+	}
+#endif /* CONFIG_PREEMPT */
+
 	platform_init(r3, r4, r5, r6, r7);
 
 	if (ppc_md.progress)
diff -Nur linux-2.4.33-imedia/arch/ppc/kernel/smp.c linux-2.4.33-imedia-patching/arch/ppc/kernel/smp.c
--- linux-2.4.33-imedia/arch/ppc/kernel/smp.c	2003-08-25 14:44:40.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/ppc/kernel/smp.c	2006-01-26 15:19:42.000000000 +0200
@@ -294,8 +294,6 @@
 	cpu_callin_map[0] = 1;
 	current->processor = 0;
 
-	init_idle();
-
 	for (i = 0; i < NR_CPUS; i++) {
 		prof_counter[i] = 1;
 		prof_multiplier[i] = 1;
@@ -351,7 +349,8 @@
 		p = init_task.prev_task;
 		if (!p)
 			panic("No idle task for CPU %d", i);
-		del_from_runqueue(p);
+		init_idle(p, i);
+
 		unhash_process(p);
 		init_tasks[i] = p;
 
diff -Nur linux-2.4.33-imedia/arch/ppc/kernel/temp.c linux-2.4.33-imedia-patching/arch/ppc/kernel/temp.c
--- linux-2.4.33-imedia/arch/ppc/kernel/temp.c	2003-08-25 14:44:40.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/ppc/kernel/temp.c	2006-01-26 15:19:42.000000000 +0200
@@ -138,7 +138,7 @@
 
 static void tau_timeout(void * info)
 {
-	unsigned long cpu = smp_processor_id();
+	unsigned long cpu;
 	unsigned long flags;
 	int size;
 	int shrink;
@@ -146,6 +146,8 @@
 	/* disabling interrupts *should* be okay */
 	save_flags(flags); cli();
 
+	cpu = smp_processor_id();
+
 #ifndef CONFIG_TAU_INT
 	TAUupdate(cpu);
 #endif
@@ -191,13 +193,15 @@
 
 static void tau_timeout_smp(unsigned long unused)
 {
-
 	/* schedule ourselves to be run again */
 	mod_timer(&tau_timer, jiffies + shrink_timer) ;
+
+	preempt_disable();
 #ifdef CONFIG_SMP
 	smp_call_function(tau_timeout, NULL, 1, 0);
 #endif
 	tau_timeout(NULL);
+	preempt_enable();
 }
 
 /*
diff -Nur linux-2.4.33-imedia/arch/ppc/lib/dec_and_lock.c linux-2.4.33-imedia-patching/arch/ppc/lib/dec_and_lock.c
--- linux-2.4.33-imedia/arch/ppc/lib/dec_and_lock.c	2001-11-16 20:10:08.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/ppc/lib/dec_and_lock.c	2006-01-26 15:19:42.000000000 +0200
@@ -1,4 +1,5 @@
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <asm/atomic.h>
 #include <asm/system.h>
diff -Nur linux-2.4.33-imedia/arch/ppc/mm/init.c linux-2.4.33-imedia-patching/arch/ppc/mm/init.c
--- linux-2.4.33-imedia/arch/ppc/mm/init.c	2003-11-28 20:26:19.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/ppc/mm/init.c	2006-01-26 15:19:42.000000000 +0200
@@ -126,6 +126,9 @@
 int do_check_pgt_cache(int low, int high)
 {
 	int freed = 0;
+
+	preempt_disable();
+
 	if (pgtable_cache_size > high) {
 		do {
                         if (pgd_quicklist) {
@@ -138,6 +141,9 @@
 			}
 		} while (pgtable_cache_size > low);
 	}
+
+	preempt_enable();
+
 	return freed;
 }
 
diff -Nur linux-2.4.33-imedia/arch/ppc/mm/tlb.c linux-2.4.33-imedia-patching/arch/ppc/mm/tlb.c
--- linux-2.4.33-imedia/arch/ppc/mm/tlb.c	2003-08-25 14:44:40.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/ppc/mm/tlb.c	2006-01-26 15:19:42.000000000 +0200
@@ -58,11 +58,14 @@
 	 * we can and should dispense with flush_tlb_all().
 	 *  -- paulus.
 	 */
+
+	preempt_disable();
 	local_flush_tlb_range(&init_mm, TASK_SIZE, ~0UL);
 
 #ifdef CONFIG_SMP
 	smp_send_tlb_invalidate(0);
 #endif /* CONFIG_SMP */
+	preempt_enable();
 }
 
 /*
@@ -73,8 +76,10 @@
 void
 local_flush_tlb_mm(struct mm_struct *mm)
 {
+	preempt_disable();
 	if (Hash == 0) {
 		_tlbia();
+		preempt_enable();
 		return;
 	}
 
@@ -88,6 +93,7 @@
 #ifdef CONFIG_SMP
 	smp_send_tlb_invalidate(0);
 #endif
+	preempt_enable();
 }
 
 void
@@ -97,8 +103,10 @@
 	pmd_t *pmd;
 	pte_t *pte;
 
+	preempt_disable();
 	if (Hash == 0) {
 		_tlbie(vmaddr);
+		preempt_enable();
 		return;
 	}
 	mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
@@ -111,6 +119,7 @@
 #ifdef CONFIG_SMP
 	smp_send_tlb_invalidate(0);
 #endif
+	preempt_enable();
 }
 
 
@@ -127,13 +136,17 @@
 	unsigned long pmd_end;
 	unsigned int ctx = mm->context;
 
+	preempt_disable();
 	if (Hash == 0) {
 		_tlbia();
+		preempt_enable();
 		return;
 	}
 	start &= PAGE_MASK;
-	if (start >= end)
+	if (start >= end) {
+		preempt_enable();
 		return;
+	}
 	pmd = pmd_offset(pgd_offset(mm, start), start);
 	do {
 		pmd_end = (start + PGDIR_SIZE) & PGDIR_MASK;
@@ -156,4 +169,5 @@
 #ifdef CONFIG_SMP
 	smp_send_tlb_invalidate(0);
 #endif
+	preempt_enable();
 }
diff -Nur linux-2.4.33-imedia/arch/ppc64/kernel/idle.c linux-2.4.33-imedia-patching/arch/ppc64/kernel/idle.c
--- linux-2.4.33-imedia/arch/ppc64/kernel/idle.c	2004-02-18 15:36:30.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/ppc64/kernel/idle.c	2006-01-26 15:19:42.000000000 +0200
@@ -87,15 +87,12 @@
 	long oldval;
 	unsigned long CTRL;
 
-	/* endless loop with no priority at all */
-	current->nice = 20;
-	current->counter = -100;
-
 	/* ensure iSeries run light will be out when idle */
 	current->thread.flags &= ~PPC_FLAG_RUN_LIGHT;
 	CTRL = mfspr(CTRLF);
 	CTRL &= ~RUNLATCH;
 	mtspr(CTRLT, CTRL);
+	/* endless loop with no priority at all */
 	init_idle();	
 
 	lpaca = get_paca();
@@ -133,8 +130,7 @@
 {
 	long oldval;
 
-	current->nice = 20;
-	current->counter = -100;
+	/* endless loop with no priority at all */
 	init_idle();
 
 	for (;;) {
@@ -161,8 +157,6 @@
 	unsigned long start_snooze;
 
 	ppaca = &paca[(lpaca->xPacaIndex) ^ 1];
-	current->nice = 20;
-	current->counter = -100;
 	init_idle();
 
 	for (;;) {
@@ -240,9 +234,6 @@
 	struct paca_struct *lpaca = get_paca();
 
 	/* endless loop with no priority at all */
-	current->nice = 20;
-	current->counter = -100;
-
 	init_idle();
 
 	for (;;) {
diff -Nur linux-2.4.33-imedia/arch/s390/kernel/asm-offsets.c linux-2.4.33-imedia-patching/arch/s390/kernel/asm-offsets.c
--- linux-2.4.33-imedia/arch/s390/kernel/asm-offsets.c	2002-08-03 03:39:43.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/s390/kernel/asm-offsets.c	2006-01-26 15:19:42.000000000 +0200
@@ -26,7 +26,7 @@
 	DEFINE(__TASK_need_resched,
 	       offsetof(struct task_struct, need_resched),);
 	DEFINE(__TASK_ptrace, offsetof(struct task_struct, ptrace),);
-	DEFINE(__TASK_processor, offsetof(struct task_struct, processor),);
+	DEFINE(__TASK_processor, offsetof(struct task_struct, cpu),);
 
 	return 0;
 }
diff -Nur linux-2.4.33-imedia/arch/s390/kernel/bitmap.S linux-2.4.33-imedia-patching/arch/s390/kernel/bitmap.S
--- linux-2.4.33-imedia/arch/s390/kernel/bitmap.S	2000-05-12 21:41:44.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/s390/kernel/bitmap.S	2006-01-26 15:19:42.000000000 +0200
@@ -35,3 +35,21 @@
          .byte  0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
          .byte  0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8
 
+         .globl _sb_findmap
+_sb_findmap:
+         .byte  8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
diff -Nur linux-2.4.33-imedia/arch/s390/kernel/entry.S linux-2.4.33-imedia-patching/arch/s390/kernel/entry.S
--- linux-2.4.33-imedia/arch/s390/kernel/entry.S	2004-11-17 13:54:21.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/s390/kernel/entry.S	2006-01-26 15:19:42.000000000 +0200
@@ -254,13 +254,14 @@
 ret_from_fork:  
         basr    %r13,0
         l       %r13,.Lentry_base-.(%r13)  # setup base pointer to &entry_base
+        # not saving R14 here because we go to sysc_return ultimately
+        l       %r1,BASED(.Lschedtail)
+        basr    %r14,%r1          # call schedule_tail (unlock stuff)
         GET_CURRENT               # load pointer to task_struct to R9
         stosm   24(%r15),0x03     # reenable interrupts
         sr      %r0,%r0           # child returns 0
         st      %r0,SP_R2(%r15)   # store return value (change R2 on stack)
-        l       %r1,BASED(.Lschedtail)
-	la      %r14,BASED(sysc_return)
-        br      %r1               # call schedule_tail, return to sysc_return
+        b       BASED(sysc_return)
 
 #
 # clone, fork, vfork, exec and sigreturn need glue,
diff -Nur linux-2.4.33-imedia/arch/s390/kernel/process.c linux-2.4.33-imedia-patching/arch/s390/kernel/process.c
--- linux-2.4.33-imedia/arch/s390/kernel/process.c	2004-02-18 15:36:30.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/s390/kernel/process.c	2006-01-26 15:19:42.000000000 +0200
@@ -50,15 +50,11 @@
  * The idle loop on a S390...
  */
 
-int cpu_idle(void *unused)
+int cpu_idle(void)
 {
 	psw_t wait_psw;
 	unsigned long reg;
 
-	/* endless idle loop with no priority at all */
-        init_idle();
-	current->nice = 20;
-	current->counter = -100;
 	while (1) {
 		__cli();
 		if (current->need_resched) {
@@ -96,7 +92,7 @@
 {
 	struct task_struct *tsk = current;
 
-        printk("CPU:    %d    %s\n", tsk->processor, print_tainted());
+        printk("CPU:    %d    %s\n", tsk->cpu, print_tainted());
         printk("Process %s (pid: %d, task: %08lx, ksp: %08x)\n",
 	       current->comm, current->pid, (unsigned long) tsk,
 	       tsk->thread.ksp);
diff -Nur linux-2.4.33-imedia/arch/s390/kernel/smp.c linux-2.4.33-imedia-patching/arch/s390/kernel/smp.c
--- linux-2.4.33-imedia/arch/s390/kernel/smp.c	2004-11-17 13:54:21.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/s390/kernel/smp.c	2006-01-26 15:19:42.000000000 +0200
@@ -38,7 +38,7 @@
 #include <asm/cpcmd.h>
 
 /* prototypes */
-extern int cpu_idle(void * unused);
+extern int cpu_idle(void);
 
 extern __u16 boot_cpu_addr;
 extern volatile int __cpu_logical_map[];
@@ -56,6 +56,7 @@
 spinlock_t       kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 
 unsigned long	 cpu_online_map;
+unsigned long	 cache_decay_ticks;
 
 /*
  *      Setup routine for controlling SMP activation
@@ -512,7 +513,7 @@
 {
         int curr_cpu;
 
-        current->processor = 0;
+        current->cpu = 0;
         smp_num_cpus = 1;
 	cpu_online_map = 1;
         for (curr_cpu = 0;
@@ -553,7 +554,7 @@
 	pfault_init();
 #endif
         /* cpu_idle will call schedule for us */
-        return cpu_idle(NULL);
+        return cpu_idle();
 }
 
 /*
@@ -591,12 +592,9 @@
         idle = init_task.prev_task;
         if (!idle)
                 panic("No idle process for CPU %d",cpu);
-        idle->processor = cpu;
-	idle->cpus_runnable = 1 << cpu; /* we schedule the first task manually */
+	init_idle(idle, cpu);
 
-        del_from_runqueue(idle);
         unhash_process(idle);
-        init_tasks[cpu] = idle;
 
         cpu_lowcore = get_cpu_lowcore(cpu);
 	cpu_lowcore->save_area[15] = idle->thread.ksp;
@@ -648,6 +646,8 @@
                 panic("Couldn't request external interrupt 0x1202");
         smp_count_cpus();
         memset(lowcore_ptr,0,sizeof(lowcore_ptr));  
+
+        cache_decay_ticks = (200 * HZ) / 1000;  /* Is 200ms ok? Robus? XXX */
         
         /*
          *      Initialize the logical to physical CPU number mapping
diff -Nur linux-2.4.33-imedia/arch/s390/kernel/traps.c linux-2.4.33-imedia-patching/arch/s390/kernel/traps.c
--- linux-2.4.33-imedia/arch/s390/kernel/traps.c	2002-11-29 01:53:11.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/s390/kernel/traps.c	2006-01-26 15:19:42.000000000 +0200
@@ -142,7 +142,7 @@
 	 * We can't print the backtrace of a running process. It is
 	 * unreliable at best and can cause kernel oopses.
 	 */
-	if (task_has_cpu(tsk))
+	if (tsk->state == TASK_RUNNING)
 		return;
 	show_trace((unsigned long *) tsk->thread.ksp);
 }
diff -Nur linux-2.4.33-imedia/arch/s390x/kernel/asm-offsets.c linux-2.4.33-imedia-patching/arch/s390x/kernel/asm-offsets.c
--- linux-2.4.33-imedia/arch/s390x/kernel/asm-offsets.c	2002-08-03 03:39:43.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/s390x/kernel/asm-offsets.c	2006-01-26 15:19:42.000000000 +0200
@@ -26,7 +26,7 @@
 	DEFINE(__TASK_need_resched,
 	       offsetof(struct task_struct, need_resched),);
 	DEFINE(__TASK_ptrace, offsetof(struct task_struct, ptrace),);
-	DEFINE(__TASK_processor, offsetof(struct task_struct, processor),);
+	DEFINE(__TASK_processor, offsetof(struct task_struct, cpu),);
 
 	return 0;
 }
diff -Nur linux-2.4.33-imedia/arch/s390x/kernel/bitmap.S linux-2.4.33-imedia-patching/arch/s390x/kernel/bitmap.S
--- linux-2.4.33-imedia/arch/s390x/kernel/bitmap.S	2001-02-14 00:13:44.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/s390x/kernel/bitmap.S	2006-01-26 15:19:42.000000000 +0200
@@ -35,3 +35,21 @@
          .byte  0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
          .byte  0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8
 
+         .globl _sb_findmap
+_sb_findmap:
+         .byte  8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+         .byte  4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
diff -Nur linux-2.4.33-imedia/arch/s390x/kernel/entry.S linux-2.4.33-imedia-patching/arch/s390x/kernel/entry.S
--- linux-2.4.33-imedia/arch/s390x/kernel/entry.S	2004-11-17 13:54:21.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/s390x/kernel/entry.S	2006-01-26 15:19:42.000000000 +0200
@@ -240,11 +240,11 @@
 #
         .globl  ret_from_fork
 ret_from_fork:  
+        brasl   %r14,schedule_tail
         GET_CURRENT               # load pointer to task_struct to R9
         stosm   48(%r15),0x03     # reenable interrupts
 	xc      SP_R2(8,%r15),SP_R2(%r15) # child returns 0
-	larl    %r14,sysc_return
-        jg      schedule_tail     # return to sysc_return
+        j       sysc_return
 
 #
 # clone, fork, vfork, exec and sigreturn need glue,
diff -Nur linux-2.4.33-imedia/arch/s390x/kernel/process.c linux-2.4.33-imedia-patching/arch/s390x/kernel/process.c
--- linux-2.4.33-imedia/arch/s390x/kernel/process.c	2004-02-18 15:36:30.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/s390x/kernel/process.c	2006-01-26 15:19:42.000000000 +0200
@@ -55,10 +55,6 @@
 	psw_t wait_psw;
 	unsigned long reg;
 
-	/* endless idle loop with no priority at all */
-        init_idle();
-	current->nice = 20;
-	current->counter = -100;
 	while (1) {
 		__cli();
 		if (current->need_resched) {
@@ -93,7 +89,7 @@
 {
 	struct task_struct *tsk = current;
 
-        printk("CPU:    %d    %s\n", tsk->processor, print_tainted());
+        printk("CPU:    %d    %s\n", tsk->cpu, print_tainted());
         printk("Process %s (pid: %d, task: %016lx, ksp: %016lx)\n",
 	       current->comm, current->pid, (unsigned long) tsk,
 	       tsk->thread.ksp);
diff -Nur linux-2.4.33-imedia/arch/s390x/kernel/smp.c linux-2.4.33-imedia-patching/arch/s390x/kernel/smp.c
--- linux-2.4.33-imedia/arch/s390x/kernel/smp.c	2004-11-17 13:54:21.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/s390x/kernel/smp.c	2006-01-26 15:19:42.000000000 +0200
@@ -38,7 +38,7 @@
 #include <asm/cpcmd.h>
 
 /* prototypes */
-extern int cpu_idle(void * unused);
+extern int cpu_idle(void);
 
 extern __u16 boot_cpu_addr;
 extern volatile int __cpu_logical_map[];
@@ -56,6 +56,7 @@
 spinlock_t       kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 
 unsigned long	 cpu_online_map;
+unsigned long	 cache_decay_ticks;
 
 /*
  *      Setup routine for controlling SMP activation
@@ -494,7 +495,7 @@
 {
         int curr_cpu;
 
-        current->processor = 0;
+        current->cpu = 0;
         smp_num_cpus = 1;
         cpu_online_map = 1;
         for (curr_cpu = 0;
@@ -534,7 +535,7 @@
 	pfault_init();
 #endif
         /* cpu_idle will call schedule for us */
-        return cpu_idle(NULL);
+        return cpu_idle();
 }
 
 /*
@@ -572,12 +573,9 @@
         idle = init_task.prev_task;
         if (!idle)
                 panic("No idle process for CPU %d",cpu);
-        idle->processor = cpu;
-	idle->cpus_runnable = 1 << cpu; /* we schedule the first task manually */
+	init_idle(idle, cpu);
 
-        del_from_runqueue(idle);
         unhash_process(idle);
-        init_tasks[cpu] = idle;
 
         cpu_lowcore = get_cpu_lowcore(cpu);
 	cpu_lowcore->save_area[15] = idle->thread.ksp;
@@ -631,6 +629,8 @@
         smp_count_cpus();
         memset(lowcore_ptr,0,sizeof(lowcore_ptr));  
         
+        cache_decay_ticks = (200 * HZ) / 1000;  /* Is 200ms ok? Robus? XXX */
+
         /*
          *      Initialize the logical to physical CPU number mapping
          */
diff -Nur linux-2.4.33-imedia/arch/s390x/kernel/traps.c linux-2.4.33-imedia-patching/arch/s390x/kernel/traps.c
--- linux-2.4.33-imedia/arch/s390x/kernel/traps.c	2002-11-29 01:53:11.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/s390x/kernel/traps.c	2006-01-26 15:19:42.000000000 +0200
@@ -144,7 +144,7 @@
 	 * We can't print the backtrace of a running process. It is
 	 * unreliable at best and can cause kernel oopses.
 	 */
-	if (task_has_cpu(tsk))
+	if (tsk->state == TASK_RUNNING)
 		return;
 	show_trace((unsigned long *) tsk->thread.ksp);
 }
diff -Nur linux-2.4.33-imedia/arch/sh/kernel/process.c linux-2.4.33-imedia-patching/arch/sh/kernel/process.c
--- linux-2.4.33-imedia/arch/sh/kernel/process.c	2003-08-25 14:44:40.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/sh/kernel/process.c	2006-01-26 15:19:42.000000000 +0200
@@ -42,8 +42,6 @@
 {
 	/* endless idle loop with no priority at all */
 	init_idle();
-	current->nice = 20;
-	current->counter = -100;
 
 	while (1) {
 		if (hlt_counter) {
diff -Nur linux-2.4.33-imedia/arch/sparc/kernel/process.c linux-2.4.33-imedia-patching/arch/sparc/kernel/process.c
--- linux-2.4.33-imedia/arch/sparc/kernel/process.c	2005-04-04 04:42:19.000000000 +0300
+++ linux-2.4.33-imedia-patching/arch/sparc/kernel/process.c	2006-01-26 15:19:42.000000000 +0200
@@ -80,8 +80,6 @@
 		goto out;
 
 	/* endless idle loop with no priority at all */
-	current->nice = 20;
-	current->counter = -100;
 	init_idle();
 
 	for (;;) {
@@ -134,8 +132,6 @@
 int cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
-	current->nice = 20;
-	current->counter = -100;
 	init_idle();
 
 	while(1) {
diff -Nur linux-2.4.33-imedia/arch/sparc64/kernel/process.c linux-2.4.33-imedia-patching/arch/sparc64/kernel/process.c
--- linux-2.4.33-imedia/arch/sparc64/kernel/process.c	2006-01-11 20:29:26.000000000 +0200
+++ linux-2.4.33-imedia-patching/arch/sparc64/kernel/process.c	2006-01-26 15:19:42.000000000 +0200
@@ -54,8 +54,6 @@
 		return -EPERM;
 
 	/* endless idle loop with no priority at all */
-	current->nice = 20;
-	current->counter = -100;
 	init_idle();
 
 	for (;;) {
@@ -84,8 +82,6 @@
 #define unidle_me()		(cpu_data[current->processor].idle_volume = 0)
 int cpu_idle(void)
 {
-	current->nice = 20;
-	current->counter = -100;
 	init_idle();
 
 	while(1) {
diff -Nur linux-2.4.33-imedia/drivers/block/ll_rw_blk.c linux-2.4.33-imedia-patching/drivers/block/ll_rw_blk.c
--- linux-2.4.33-imedia/drivers/block/ll_rw_blk.c	2004-11-17 13:54:21.000000000 +0200
+++ linux-2.4.33-imedia-patching/drivers/block/ll_rw_blk.c	2006-01-26 15:19:42.000000000 +0200
@@ -1328,6 +1328,7 @@
 			kstat.pgpgin += count;
 			break;
 	}
+	conditional_schedule();
 }
 
 /**
diff -Nur linux-2.4.33-imedia/drivers/char/drm-4.0/tdfx_drv.c linux-2.4.33-imedia-patching/drivers/char/drm-4.0/tdfx_drv.c
--- linux-2.4.33-imedia/drivers/char/drm-4.0/tdfx_drv.c	2004-02-18 15:36:31.000000000 +0200
+++ linux-2.4.33-imedia-patching/drivers/char/drm-4.0/tdfx_drv.c	2006-01-26 15:19:42.000000000 +0200
@@ -554,7 +554,6 @@
 					lock.context, current->pid, j,
 					dev->lock.lock_time, jiffies);
                                 current->state = TASK_INTERRUPTIBLE;
-				current->policy |= SCHED_YIELD;
                                 schedule_timeout(DRM_LOCK_SLICE-j);
 				DRM_DEBUG("jiffies=%d\n", jiffies);
                         }
diff -Nur linux-2.4.33-imedia/drivers/char/mem.c linux-2.4.33-imedia-patching/drivers/char/mem.c
--- linux-2.4.33-imedia/drivers/char/mem.c	2004-08-08 02:26:04.000000000 +0300
+++ linux-2.4.33-imedia-patching/drivers/char/mem.c	2006-01-26 15:19:42.000000000 +0200
@@ -401,7 +401,7 @@
 		if (count > size)
 			count = size;
 
-		zap_page_range(mm, addr, count);
+		zap_page_range(mm, addr, count, 0);
         	zeromap_page_range(addr, count, PAGE_COPY);
 
 		size -= count;
diff -Nur linux-2.4.33-imedia/drivers/char/mwave/mwavedd.c linux-2.4.33-imedia-patching/drivers/char/mwave/mwavedd.c
--- linux-2.4.33-imedia/drivers/char/mwave/mwavedd.c	2003-06-13 17:51:33.000000000 +0300
+++ linux-2.4.33-imedia-patching/drivers/char/mwave/mwavedd.c	2006-01-26 15:19:42.000000000 +0200
@@ -279,7 +279,6 @@
 			pDrvData->IPCs[ipcnum].bIsHere = FALSE;
 			pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
 	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0)
-			current->nice = -20;	/* boost to provide priority timing */
 	#else
 			current->priority = 0x28;	/* boost to provide priority timing */
 	#endif
diff -Nur linux-2.4.33-imedia/drivers/char/random.c linux-2.4.33-imedia-patching/drivers/char/random.c
--- linux-2.4.33-imedia/drivers/char/random.c	2005-06-01 03:56:56.000000000 +0300
+++ linux-2.4.33-imedia-patching/drivers/char/random.c	2006-01-26 15:19:42.000000000 +0200
@@ -1378,6 +1378,11 @@
 		buf += i;
 		ret += i;
 		add_timer_randomness(&extract_timer_state, nbytes);
+#if LOWLATENCY_NEEDED
+		/* This can happen in softirq's, but that's what we want */
+		if (conditional_schedule_needed())
+			break;
+#endif
 	}
 
 	/* Wipe data just returned from memory */
diff -Nur linux-2.4.33-imedia/drivers/char/serial_txx927.c linux-2.4.33-imedia-patching/drivers/char/serial_txx927.c
--- linux-2.4.33-imedia/drivers/char/serial_txx927.c	2005-01-19 16:09:51.000000000 +0200
+++ linux-2.4.33-imedia-patching/drivers/char/serial_txx927.c	2006-01-26 15:19:42.000000000 +0200
@@ -1526,7 +1526,6 @@
 		printk("cisr = %d (jiff=%lu)...", cisr, jiffies);
 #endif
 		current->state = TASK_INTERRUPTIBLE;
-		current->counter = 0;	/* make us low-priority */
 		schedule_timeout(char_time);
 		if (signal_pending(current))
 			break;
diff -Nur linux-2.4.33-imedia/drivers/i2c/i2c-algo-bit.c linux-2.4.33-imedia-patching/drivers/i2c/i2c-algo-bit.c
--- linux-2.4.33-imedia/drivers/i2c/i2c-algo-bit.c	2005-04-04 04:42:19.000000000 +0300
+++ linux-2.4.33-imedia-patching/drivers/i2c/i2c-algo-bit.c	2006-01-26 15:19:42.000000000 +0200
@@ -367,6 +367,7 @@
 			return (retval<0)? retval : -EFAULT;
 			        /* got a better one ?? */
 		}
+		conditional_schedule();
 #if 0
 		/* from asm/delay.h */
 		__delay(adap->mdelay * (loops_per_sec / 1000) );
diff -Nur linux-2.4.33-imedia/drivers/i2c/i2c-core.c linux-2.4.33-imedia-patching/drivers/i2c/i2c-core.c
--- linux-2.4.33-imedia/drivers/i2c/i2c-core.c	2005-06-01 03:56:56.000000000 +0300
+++ linux-2.4.33-imedia-patching/drivers/i2c/i2c-core.c	2006-01-26 15:19:42.000000000 +0200
@@ -724,6 +724,8 @@
 {
 	int ret;
 
+	conditional_schedule();
+
 	if (adap->algo->master_xfer) {
  	 	DEB2(printk(KERN_DEBUG "i2c-core.o: master_xfer: %s with %d msgs.\n",
 		            adap->name,num));
@@ -746,6 +748,8 @@
 	struct i2c_adapter *adap=client->adapter;
 	struct i2c_msg msg;
 
+	conditional_schedule();
+
 	if (client->adapter->algo->master_xfer) {
 		msg.addr   = client->addr;
 		msg.flags = client->flags & I2C_M_TEN;
@@ -775,6 +779,9 @@
 	struct i2c_adapter *adap=client->adapter;
 	struct i2c_msg msg;
 	int ret;
+
+	conditional_schedule();
+
 	if (client->adapter->algo->master_xfer) {
 		msg.addr   = client->addr;
 		msg.flags = client->flags & I2C_M_TEN;
diff -Nur linux-2.4.33-imedia/drivers/ieee1394/csr.c linux-2.4.33-imedia-patching/drivers/ieee1394/csr.c
--- linux-2.4.33-imedia/drivers/ieee1394/csr.c	2004-02-18 15:36:31.000000000 +0200
+++ linux-2.4.33-imedia-patching/drivers/ieee1394/csr.c	2006-01-26 15:19:42.000000000 +0200
@@ -18,6 +18,7 @@
  */
 
 #include <linux/string.h>
+#include <linux/sched.h>
 #include <linux/module.h> /* needed for MODULE_PARM */
 #include <linux/param.h>
 #include <linux/spinlock.h>
diff -Nur linux-2.4.33-imedia/drivers/md/md.c linux-2.4.33-imedia-patching/drivers/md/md.c
--- linux-2.4.33-imedia/drivers/md/md.c	2005-11-16 21:12:54.000000000 +0200
+++ linux-2.4.33-imedia-patching/drivers/md/md.c	2006-01-26 15:19:42.000000000 +0200
@@ -2955,8 +2955,6 @@
 	 * bdflush, otherwise bdflush will deadlock if there are too
 	 * many dirty RAID5 blocks.
 	 */
-	current->policy = SCHED_OTHER;
-	current->nice = -20;
 	md_unlock_kernel();
 
 	complete(thread->event);
@@ -3480,11 +3478,6 @@
 	       "(but not more than %d KB/sec) for reconstruction.\n",
 	       sysctl_speed_limit_max);
 
-	/*
-	 * Resync has low priority.
-	 */
-	current->nice = 19;
-
 	is_mddev_idle(mddev); /* this also initializes IO event counters */
 	for (m = 0; m < SYNC_MARKS; m++) {
 		mark[m] = jiffies;
@@ -3562,16 +3555,13 @@
 		currspeed = (j-mddev->resync_mark_cnt)/2/((jiffies-mddev->resync_mark)/HZ +1) +1;
 
 		if (currspeed > sysctl_speed_limit_min) {
-			current->nice = 19;
-
 			if ((currspeed > sysctl_speed_limit_max) ||
 					!is_mddev_idle(mddev)) {
 				current->state = TASK_INTERRUPTIBLE;
 				md_schedule_timeout(HZ/4);
 				goto repeat;
 			}
-		} else
-			current->nice = -20;
+		}
 	}
 	printk(KERN_INFO "md: md%d: sync done.\n",mdidx(mddev));
 	err = 0;
diff -Nur linux-2.4.33-imedia/drivers/sound/sound_core.c linux-2.4.33-imedia-patching/drivers/sound/sound_core.c
--- linux-2.4.33-imedia/drivers/sound/sound_core.c	2001-09-30 22:26:08.000000000 +0300
+++ linux-2.4.33-imedia-patching/drivers/sound/sound_core.c	2006-01-26 15:19:42.000000000 +0200
@@ -37,6 +37,7 @@
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
diff -Nur linux-2.4.33-imedia/drivers/video/fbcon-cfb16.c linux-2.4.33-imedia-patching/drivers/video/fbcon-cfb16.c
--- linux-2.4.33-imedia/drivers/video/fbcon-cfb16.c	2001-10-15 23:47:13.000000000 +0300
+++ linux-2.4.33-imedia-patching/drivers/video/fbcon-cfb16.c	2006-01-26 15:19:42.000000000 +0200
@@ -189,6 +189,7 @@
     case 4:
     case 8:
 	while (count--) {
+	    conditional_schedule();
 	    c = scr_readw(s++) & p->charmask;
 	    cdat = p->fontdata + c * fontheight(p);
 	    for (rows = fontheight(p), dest = dest0; rows--; dest += bytes) {
@@ -206,6 +207,7 @@
     case 12:
     case 16:
 	while (count--) {
+	    conditional_schedule();
 	    c = scr_readw(s++) & p->charmask;
 	    cdat = p->fontdata + (c * fontheight(p) << 1);
 	    for (rows = fontheight(p), dest = dest0; rows--; dest += bytes) {
diff -Nur linux-2.4.33-imedia/fs/adfs/map.c linux-2.4.33-imedia-patching/fs/adfs/map.c
--- linux-2.4.33-imedia/fs/adfs/map.c	2001-10-25 23:53:53.000000000 +0300
+++ linux-2.4.33-imedia-patching/fs/adfs/map.c	2006-01-26 15:19:42.000000000 +0200
@@ -12,6 +12,7 @@
 #include <linux/fs.h>
 #include <linux/adfs_fs.h>
 #include <linux/spinlock.h>
+#include <linux/sched.h>
 
 #include "adfs.h"
 
diff -Nur linux-2.4.33-imedia/fs/binfmt_elf.c linux-2.4.33-imedia-patching/fs/binfmt_elf.c
--- linux-2.4.33-imedia/fs/binfmt_elf.c	2006-01-11 20:29:27.000000000 +0200
+++ linux-2.4.33-imedia-patching/fs/binfmt_elf.c	2006-01-26 15:19:42.000000000 +0200
@@ -1267,7 +1267,7 @@
 	psinfo.pr_state = i;
 	psinfo.pr_sname = (i < 0 || i > 5) ? '.' : "RSDZTD"[i];
 	psinfo.pr_zomb = psinfo.pr_sname == 'Z';
-	psinfo.pr_nice = current->nice;
+	psinfo.pr_nice = task_nice(current);
 	psinfo.pr_flag = current->flags;
 	psinfo.pr_uid = NEW_TO_OLD_UID(current->uid);
 	psinfo.pr_gid = NEW_TO_OLD_GID(current->gid);
diff -Nur linux-2.4.33-imedia/fs/buffer.c linux-2.4.33-imedia-patching/fs/buffer.c
--- linux-2.4.33-imedia/fs/buffer.c	2005-11-16 21:12:54.000000000 +0200
+++ linux-2.4.33-imedia-patching/fs/buffer.c	2006-01-26 15:19:42.000000000 +0200
@@ -124,7 +124,7 @@
 		int dummy5;	/* unused */
 	} b_un;
 	unsigned int data[N_PARAM];
-} bdf_prm = {{30, 500, 0, 0, 5*HZ, 30*HZ, 60, 20, 0}};
+} bdf_prm = {{50, 500, 0, 0, 5*HZ, 30*HZ, 60, 20, 0}};
 
 /* These are the min and max parameter values that we will allow to be assigned */
 int bdflush_min[N_PARAM] = {  0,  1,    0,   0,  0,   1*HZ,   0, 0, 0};
@@ -261,8 +261,10 @@
 
 		if (dev != NODEV && bh->b_dev != dev)
 			continue;
-		if (test_and_set_bit(BH_Lock, &bh->b_state))
+		if (test_and_set_bit(BH_Lock, &bh->b_state)) {
+			__refile_buffer(bh);
 			continue;
+		}
 		if (buffer_delay(bh)) {
 			if (write_buffer_delay(bh)) {
 				if (count)
@@ -278,6 +280,7 @@
 
 			spin_unlock(&lru_list_lock);
 			write_locked_buffers(array, count);
+			conditional_schedule();
 			return -EAGAIN;
 		}
 		unlock_buffer(bh);
@@ -311,12 +314,19 @@
 	struct buffer_head * next;
 	int nr;
 
-	next = lru_list[index];
 	nr = nr_buffers_type[index];
+repeat:
+	next = lru_list[index];
 	while (next && --nr >= 0) {
 		struct buffer_head *bh = next;
 		next = bh->b_next_free;
 
+		if (conditional_schedule_needed()) {
+			spin_unlock(&lru_list_lock);
+			unconditional_schedule();
+			spin_lock(&lru_list_lock);
+			goto repeat;
+		}
 		if (!buffer_locked(bh)) {
 			if (refile)
 				__refile_buffer(bh);
@@ -324,7 +334,6 @@
 		}
 		if (dev != NODEV && bh->b_dev != dev)
 			continue;
-
 		get_bh(bh);
 		spin_unlock(&lru_list_lock);
 		wait_on_buffer (bh);
@@ -357,6 +366,15 @@
 {
 	int err = 0;
 
+#if LOWLATENCY_NEEDED
+	/*
+	 * syncing devA when there are lots of buffers dirty against
+	 * devB is expensive.
+	 */
+	if (enable_lowlatency)
+		dev = NODEV;
+#endif
+
 	/* One pass for no-wait, three for wait:
 	 * 0) write out all dirty, unlocked buffers;
 	 * 1) wait for all dirty locked buffers;
@@ -724,6 +742,7 @@
 	int i, nlist, slept;
 	struct buffer_head * bh, * bh_next;
 	kdev_t dev = to_kdev_t(bdev->bd_dev);	/* will become bdev */
+	int lolat_retry = 0;
 
  retry:
 	slept = 0;
@@ -741,6 +760,17 @@
 			/* Not hashed? */
 			if (!bh->b_pprev)
 				continue;
+
+			if (lolat_retry < 10 && conditional_schedule_needed()) {
+				get_bh(bh);
+				spin_unlock(&lru_list_lock);
+				unconditional_schedule();
+				spin_lock(&lru_list_lock);
+				put_bh(bh);
+				slept = 1;
+				lolat_retry++;
+			}
+
 			if (buffer_locked(bh)) {
 				get_bh(bh);
 				spin_unlock(&lru_list_lock);
@@ -892,12 +922,18 @@
 	struct buffer_head *bh;
 	struct list_head tmp;
 	int err = 0, err2;
-	
+	DEFINE_RESCHED_COUNT;
+
 	INIT_LIST_HEAD(&tmp);
-	
+repeat:
 	spin_lock(&lru_list_lock);
 
 	while (!list_empty(list)) {
+		if (conditional_schedule_needed()) {
+			spin_unlock(&lru_list_lock);
+			unconditional_schedule();
+			goto repeat;
+		}
 		bh = BH_ENTRY(list->next);
 		list_del(&bh->b_inode_buffers);
 		if (!buffer_dirty(bh) && !buffer_locked(bh))
@@ -922,8 +958,18 @@
 				spin_lock(&lru_list_lock);
 			}
 		}
+		if (TEST_RESCHED_COUNT(32)) {
+			RESET_RESCHED_COUNT();
+			if (conditional_schedule_needed()) {
+				spin_unlock(&lru_list_lock);
+				unconditional_schedule();
+				spin_lock(&lru_list_lock);
+			}
+		}
 	}
 
+	RESET_RESCHED_COUNT();
+
 	while (!list_empty(&tmp)) {
 		bh = BH_ENTRY(tmp.prev);
 		remove_inode_queue(bh);
@@ -933,6 +979,7 @@
 		if (!buffer_uptodate(bh))
 			err = -EIO;
 		brelse(bh);
+		conditional_schedule();
 		spin_lock(&lru_list_lock);
 	}
 	
@@ -960,11 +1007,20 @@
 	struct buffer_head *bh;
 	struct list_head *p;
 	int err = 0;
+	DEFINE_RESCHED_COUNT;
 
+repeat:
+	conditional_schedule();
 	spin_lock(&lru_list_lock);
 	
- repeat:
 	list_for_each_prev(p, list) {
+		if (TEST_RESCHED_COUNT(32)) {
+			RESET_RESCHED_COUNT();
+			if (conditional_schedule_needed()) {
+				spin_unlock(&lru_list_lock);
+				goto repeat;
+			}
+		}
 		bh = BH_ENTRY(p);
 		if (buffer_locked(bh)) {
 			get_bh(bh);
@@ -973,7 +1029,6 @@
 			if (!buffer_uptodate(bh))
 				err = -EIO;
 			brelse(bh);
-			spin_lock(&lru_list_lock);
 			goto repeat;
 		}
 	}
@@ -990,12 +1045,24 @@
 void invalidate_inode_buffers(struct inode *inode)
 {
 	struct list_head * entry;
-	
+
+repeat:
+	conditional_schedule();
 	spin_lock(&lru_list_lock);
-	while ((entry = inode->i_dirty_buffers.next) != &inode->i_dirty_buffers)
+	while ((entry = inode->i_dirty_buffers.next) != &inode->i_dirty_buffers) {
+		if (conditional_schedule_needed()) {
+			spin_unlock(&lru_list_lock);
+			goto repeat;
+		}
 		remove_inode_queue(BH_ENTRY(entry));
-	while ((entry = inode->i_dirty_data_buffers.next) != &inode->i_dirty_data_buffers)
+	}
+	while ((entry = inode->i_dirty_data_buffers.next) != &inode->i_dirty_data_buffers) {
+		if (conditional_schedule_needed()) {
+			spin_unlock(&lru_list_lock);
+			goto repeat;
+		}
 		remove_inode_queue(BH_ENTRY(entry));
+	}
 	spin_unlock(&lru_list_lock);
 }
 
@@ -1018,6 +1085,7 @@
 		bh = get_hash_table(dev, block, size);
 		if (bh) {
 			touch_buffer(bh);
+			conditional_schedule();
 			return bh;
 		}
 
diff -Nur linux-2.4.33-imedia/fs/dcache.c linux-2.4.33-imedia-patching/fs/dcache.c
--- linux-2.4.33-imedia/fs/dcache.c	2006-01-11 20:29:27.000000000 +0200
+++ linux-2.4.33-imedia-patching/fs/dcache.c	2006-01-26 15:19:42.000000000 +0200
@@ -324,11 +324,23 @@
  
 void prune_dcache(int count)
 {
+	DEFINE_RESCHED_COUNT;
+
+redo:
 	spin_lock(&dcache_lock);
 	for (;;) {
 		struct dentry *dentry;
 		struct list_head *tmp;
 
+		if (TEST_RESCHED_COUNT(100)) {
+			RESET_RESCHED_COUNT();
+			if (conditional_schedule_needed()) {
+				spin_unlock(&dcache_lock);
+				unconditional_schedule();
+				goto redo;
+			}
+		}
+
 		tmp = dentry_unused.prev;
 
 		if (tmp == &dentry_unused)
@@ -483,6 +495,7 @@
 	struct dentry *this_parent = parent;
 	struct list_head *next;
 	int found = 0;
+	DEFINE_RESCHED_COUNT;
 
 	spin_lock(&dcache_lock);
 repeat:
@@ -497,6 +510,13 @@
 			list_add(&dentry->d_lru, dentry_unused.prev);
 			found++;
 		}
+
+		if (TEST_RESCHED_COUNT(500) && found > 10) {
+			if (conditional_schedule_needed())	/* Typically sys_rmdir() */
+				goto out;
+			RESET_RESCHED_COUNT();
+		}
+
 		/*
 		 * Descend a level if the d_subdirs list is non-empty.
 		 */
@@ -521,6 +541,7 @@
 #endif
 		goto resume;
 	}
+out:
 	spin_unlock(&dcache_lock);
 	return found;
 }
@@ -536,8 +557,10 @@
 {
 	int found;
 
-	while ((found = select_parent(parent)) != 0)
+	while ((found = select_parent(parent)) != 0) {
 		prune_dcache(found);
+		conditional_schedule();		/* Typically sys_rmdir() */
+	}
 }
 
 /*
@@ -569,7 +592,7 @@
 	if (!(gfp_mask & __GFP_FS))
 		return 0;
 
-	count = dentry_stat.nr_unused / priority;
+	count = dentry_stat.nr_unused * priority / 100;
 
 	prune_dcache(count);
 	return kmem_cache_shrink(dentry_cache);
diff -Nur linux-2.4.33-imedia/fs/exec.c linux-2.4.33-imedia-patching/fs/exec.c
--- linux-2.4.33-imedia/fs/exec.c	2005-01-19 16:10:10.000000000 +0200
+++ linux-2.4.33-imedia-patching/fs/exec.c	2006-01-26 15:19:42.000000000 +0200
@@ -245,7 +245,7 @@
 					memset(kaddr+offset+len, 0,
 						PAGE_SIZE-offset-len);
 			}
-			err = copy_from_user(kaddr+offset, str, bytes_to_copy);
+			err = ll_copy_from_user(kaddr+offset, str, bytes_to_copy);
 			if (err) {
 				ret = -EFAULT;
 				goto out;
@@ -459,8 +459,8 @@
 		active_mm = current->active_mm;
 		current->mm = mm;
 		current->active_mm = mm;
-		task_unlock(current);
 		activate_mm(active_mm, mm);
+		task_unlock(current);
 		mm_release();
 		if (old_mm) {
 			if (active_mm != old_mm) BUG();
diff -Nur linux-2.4.33-imedia/fs/ext2/dir.c linux-2.4.33-imedia-patching/fs/ext2/dir.c
--- linux-2.4.33-imedia/fs/ext2/dir.c	2005-04-04 04:42:20.000000000 +0300
+++ linux-2.4.33-imedia-patching/fs/ext2/dir.c	2006-01-26 15:19:42.000000000 +0200
@@ -153,6 +153,7 @@
 	struct address_space *mapping = dir->i_mapping;
 	struct page *page = read_cache_page(mapping, n,
 				(filler_t*)mapping->a_ops->readpage, NULL);
+	conditional_schedule();		/* Scanning large directories */
 	if (!IS_ERR(page)) {
 		wait_on_page(page);
 		kmap(page);
diff -Nur linux-2.4.33-imedia/fs/ext2/inode.c linux-2.4.33-imedia-patching/fs/ext2/inode.c
--- linux-2.4.33-imedia/fs/ext2/inode.c	2004-08-08 02:26:05.000000000 +0300
+++ linux-2.4.33-imedia-patching/fs/ext2/inode.c	2006-01-26 15:19:42.000000000 +0200
@@ -727,8 +727,13 @@
 {
 	unsigned long block_to_free = 0, count = 0;
 	unsigned long nr;
+	DEFINE_RESCHED_COUNT;
 
 	for ( ; p < q ; p++) {
+		if (TEST_RESCHED_COUNT(32)) {
+			RESET_RESCHED_COUNT();
+			conditional_schedule();
+		}
 		nr = le32_to_cpu(*p);
 		if (nr) {
 			*p = 0;
@@ -771,6 +776,7 @@
 	if (depth--) {
 		int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
 		for ( ; p < q ; p++) {
+			conditional_schedule();		/* Deleting large files */
 			nr = le32_to_cpu(*p);
 			if (!nr)
 				continue;
diff -Nur linux-2.4.33-imedia/fs/ext3/balloc.c linux-2.4.33-imedia-patching/fs/ext3/balloc.c
--- linux-2.4.33-imedia/fs/ext3/balloc.c	2003-06-13 17:51:37.000000000 +0300
+++ linux-2.4.33-imedia-patching/fs/ext3/balloc.c	2006-01-26 15:19:42.000000000 +0200
@@ -363,6 +363,9 @@
 			}
 		}
 #endif
+		/* superblock lock is held, so this is safe */
+		conditional_schedule();
+
 		BUFFER_TRACE(bitmap_bh, "clear bit");
 		if (!ext3_clear_bit (bit + i, bitmap_bh->b_data)) {
 			ext3_error(sb, __FUNCTION__,
diff -Nur linux-2.4.33-imedia/fs/ext3/inode.c linux-2.4.33-imedia-patching/fs/ext3/inode.c
--- linux-2.4.33-imedia/fs/ext3/inode.c	2004-08-08 02:26:05.000000000 +0300
+++ linux-2.4.33-imedia-patching/fs/ext3/inode.c	2006-01-26 15:19:42.000000000 +0200
@@ -930,6 +930,8 @@
 
 	prev_blocks = inode->i_blocks;
 
+	conditional_schedule();		/* Reading large directories */
+
 	bh = ext3_getblk (handle, inode, block, create, err);
 	if (!bh)
 		return bh;
@@ -1633,6 +1635,7 @@
 	 */
 	for (p = first; p < last; p++) {
 		u32 nr = le32_to_cpu(*p);
+		conditional_schedule();
 		if (nr) {
 			struct buffer_head *bh;
 
@@ -1687,6 +1690,7 @@
 	}
 
 	for (p = first; p < last; p++) {
+		conditional_schedule();
 		nr = le32_to_cpu(*p);
 		if (nr) {
 			/* accumulate blocks to free if they're contiguous */
diff -Nur linux-2.4.33-imedia/fs/ext3/namei.c linux-2.4.33-imedia-patching/fs/ext3/namei.c
--- linux-2.4.33-imedia/fs/ext3/namei.c	2003-06-13 17:51:37.000000000 +0300
+++ linux-2.4.33-imedia-patching/fs/ext3/namei.c	2006-01-26 15:19:42.000000000 +0200
@@ -157,6 +157,7 @@
 		if ((bh = bh_use[ra_ptr++]) == NULL)
 			goto next;
 		wait_on_buffer(bh);
+		conditional_schedule();
 		if (!buffer_uptodate(bh)) {
 			/* read error, skip block & hope for the best */
 			brelse(bh);
diff -Nur linux-2.4.33-imedia/fs/fat/cache.c linux-2.4.33-imedia-patching/fs/fat/cache.c
--- linux-2.4.33-imedia/fs/fat/cache.c	2001-10-12 23:48:42.000000000 +0300
+++ linux-2.4.33-imedia-patching/fs/fat/cache.c	2006-01-26 15:19:42.000000000 +0200
@@ -14,6 +14,7 @@
 #include <linux/string.h>
 #include <linux/stat.h>
 #include <linux/fat_cvf.h>
+#include <linux/sched.h>
 
 #if 0
 #  define PRINTK(x) printk x
diff -Nur linux-2.4.33-imedia/fs/inode.c linux-2.4.33-imedia-patching/fs/inode.c
--- linux-2.4.33-imedia/fs/inode.c	2005-11-16 21:12:54.000000000 +0200
+++ linux-2.4.33-imedia-patching/fs/inode.c	2006-01-26 15:19:42.000000000 +0200
@@ -348,6 +348,8 @@
 
 	filemap_fdatawait(inode->i_mapping);
 
+	conditional_schedule();
+
 	spin_lock(&inode_lock);
 	inode->i_state &= ~I_LOCK;
 	__refile_inode(inode);
@@ -650,6 +652,7 @@
 	while (!list_empty(head)) {
 		struct inode *inode;
 
+		conditional_schedule();
 		inode = list_entry(head->next, struct inode, i_list);
 		list_del(&inode->i_list);
 
@@ -686,9 +689,22 @@
 		if (tmp == head)
 			break;
 		inode = list_entry(tmp, struct inode, i_list);
+
+		if (conditional_schedule_needed()) {
+			atomic_inc(&inode->i_count);
+			spin_unlock(&inode_lock);
+			unconditional_schedule();
+			spin_lock(&inode_lock);
+			atomic_dec(&inode->i_count);
+		}
+
 		if (inode->i_sb != sb)
 			continue;
+		atomic_inc(&inode->i_count);
+		spin_unlock(&inode_lock);
 		invalidate_inode_buffers(inode);
+		spin_lock(&inode_lock);
+		atomic_dec(&inode->i_count);
 		if (!atomic_read(&inode->i_count)) {
 			list_del_init(&inode->i_hash);
 			list_del(&inode->i_list);
@@ -798,15 +814,28 @@
 	int avg_pages;
 #endif
 	struct inode * inode;
+	int nr_to_scan = inodes_stat.nr_unused;
 
+resume:
 	spin_lock(&inode_lock);
-
 	count = 0;
 	entry = inode_unused.prev;
-	while (entry != &inode_unused)
-	{
+	while (entry != &inode_unused && nr_to_scan--) {
 		struct list_head *tmp = entry;
 
+		if (conditional_schedule_needed()) {
+			/*
+			 * Need to drop the lock.  Reposition
+			 * the list head so we start here next time.
+			 * This can corrupt the LRU nature of the
+			 * unused list, but this isn't very important.
+			 */
+			list_del(&inode_unused);
+			list_add(&inode_unused, entry);
+			spin_unlock(&inode_lock);
+			unconditional_schedule();
+			goto resume;
+		}
 		entry = entry->prev;
 		inode = INODE(tmp);
 		if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK))
@@ -914,7 +943,7 @@
 	if (!(gfp_mask & __GFP_FS))
 		return 0;
 
-	count = inodes_stat.nr_unused / priority;
+	count = inodes_stat.nr_unused * priority / 100;
 
 	prune_icache(count);
 	return kmem_cache_shrink(inode_cachep);
@@ -1008,6 +1037,8 @@
 	if (inode) {
 		struct inode * old;
 
+		conditional_schedule();			/* sync_old_buffers */
+
 		spin_lock(&inode_lock);
 		/* We released the lock, so.. */
 		old = find_inode(sb, ino, head, find_actor, opaque);
diff -Nur linux-2.4.33-imedia/fs/jbd/checkpoint.c linux-2.4.33-imedia-patching/fs/jbd/checkpoint.c
--- linux-2.4.33-imedia/fs/jbd/checkpoint.c	2002-11-29 01:53:15.000000000 +0200
+++ linux-2.4.33-imedia-patching/fs/jbd/checkpoint.c	2006-01-26 15:19:42.000000000 +0200
@@ -431,7 +431,11 @@
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
 	int ret = 0;
+	int ll_retries = 4;		/* lowlatency addition */
 
+restart:
+	if (ll_retries-- == 0)
+		goto out;
 	transaction = journal->j_checkpoint_transactions;
 	if (transaction == 0)
 		goto out;
@@ -451,6 +455,12 @@
 				jh = next_jh;
 				next_jh = jh->b_cpnext;
 				ret += __try_to_free_cp_buf(jh);
+				if (conditional_schedule_needed()) {
+					spin_unlock(&journal_datalist_lock);
+					unconditional_schedule();
+					spin_lock(&journal_datalist_lock);
+					goto restart;
+				}
 			} while (jh != last_jh);
 		}
 	} while (transaction != last_transaction);
diff -Nur linux-2.4.33-imedia/fs/jbd/commit.c linux-2.4.33-imedia-patching/fs/jbd/commit.c
--- linux-2.4.33-imedia/fs/jbd/commit.c	2004-02-18 15:36:31.000000000 +0200
+++ linux-2.4.33-imedia-patching/fs/jbd/commit.c	2006-01-26 15:19:42.000000000 +0200
@@ -257,6 +257,16 @@
 				__journal_remove_journal_head(bh);
 				refile_buffer(bh);
 				release_buffer_page(bh);
+				if (conditional_schedule_needed()) {
+					if (commit_transaction->t_sync_datalist)
+						commit_transaction->t_sync_datalist =
+							next_jh;
+					if (bufs)
+						break;
+					spin_unlock(&journal_datalist_lock);
+					unconditional_schedule();
+					goto write_out_data;
+				}
 			}
 		}
 		if (bufs == ARRAY_SIZE(wbuf)) {
@@ -280,8 +290,7 @@
 		journal_brelse_array(wbuf, bufs);
 		lock_journal(journal);
 		spin_lock(&journal_datalist_lock);
-		if (bufs)
-			goto write_out_data_locked;
+		goto write_out_data_locked;
 	}
 
 	/*
@@ -317,6 +326,15 @@
 	 */
 	while ((jh = commit_transaction->t_async_datalist)) {
 		struct buffer_head *bh = jh2bh(jh);
+
+		if (conditional_schedule_needed()) {
+			spin_unlock(&journal_datalist_lock);
+			unlock_journal(journal);
+			unconditional_schedule();
+			lock_journal(journal);
+			spin_lock(&journal_datalist_lock);
+			continue;	/* List may have changed */
+		}
 		if (__buffer_state(bh, Freed)) {
 			BUFFER_TRACE(bh, "Cleaning freed buffer");
 			clear_bit(BH_Freed, &bh->b_state);
@@ -536,6 +554,8 @@
  wait_for_iobuf:
 	while (commit_transaction->t_iobuf_list != NULL) {
 		struct buffer_head *bh;
+
+		conditional_schedule();
 		jh = commit_transaction->t_iobuf_list->b_tprev;
 		bh = jh2bh(jh);
 		if (buffer_locked(bh)) {
@@ -695,6 +715,8 @@
 		struct buffer_head *bh;
 		int was_freed = 0;
 		
+		conditional_schedule();		/* journal is locked */
+
 		jh = commit_transaction->t_forget;
 		J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction ||
 			jh->b_transaction == journal->j_running_transaction);
diff -Nur linux-2.4.33-imedia/fs/jffs2/background.c linux-2.4.33-imedia-patching/fs/jffs2/background.c
--- linux-2.4.33-imedia/fs/jffs2/background.c	2001-10-25 10:07:09.000000000 +0300
+++ linux-2.4.33-imedia-patching/fs/jffs2/background.c	2006-01-26 15:19:42.000000000 +0200
@@ -106,9 +106,6 @@
 
         sprintf(current->comm, "jffs2_gcd_mtd%d", c->mtd->index);
 
-	/* FIXME in the 2.2 backport */
-	current->nice = 10;
-
 	for (;;) {
 		spin_lock_irq(&current->sigmask_lock);
 		siginitsetinv (&current->blocked, sigmask(SIGHUP) | sigmask(SIGKILL) | sigmask(SIGSTOP) | sigmask(SIGCONT));
diff -Nur linux-2.4.33-imedia/fs/nfsd/nfssvc.c linux-2.4.33-imedia-patching/fs/nfsd/nfssvc.c
--- linux-2.4.33-imedia/fs/nfsd/nfssvc.c	2002-11-29 01:53:15.000000000 +0200
+++ linux-2.4.33-imedia-patching/fs/nfsd/nfssvc.c	2006-01-26 15:19:42.000000000 +0200
@@ -250,6 +250,7 @@
 	svc_exit_thread(rqstp);
 
 	/* Release module */
+	unlock_kernel();
 	MOD_DEC_USE_COUNT;
 }
 
diff -Nur linux-2.4.33-imedia/fs/nls/nls_base.c linux-2.4.33-imedia-patching/fs/nls/nls_base.c
--- linux-2.4.33-imedia/fs/nls/nls_base.c	2002-08-03 03:39:45.000000000 +0300
+++ linux-2.4.33-imedia-patching/fs/nls/nls_base.c	2006-01-26 15:19:42.000000000 +0200
@@ -18,6 +18,7 @@
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
 #endif
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 
 static struct nls_table *tables;
diff -Nur linux-2.4.33-imedia/fs/proc/array.c linux-2.4.33-imedia-patching/fs/proc/array.c
--- linux-2.4.33-imedia/fs/proc/array.c	2005-01-19 16:10:11.000000000 +0200
+++ linux-2.4.33-imedia-patching/fs/proc/array.c	2006-01-26 15:19:42.000000000 +0200
@@ -345,9 +345,8 @@
 
 	/* scale priority and nice values from timeslices to -20..20 */
 	/* to make it look like a "normal" Unix priority/nice value  */
-	priority = task->counter;
-	priority = 20 - (priority * 10 + DEF_COUNTER / 2) / DEF_COUNTER;
-	nice = task->nice;
+	priority = task_prio(task);
+	nice = task_nice(task);
 
 	read_lock(&tasklist_lock);
 	ppid = task->pid ? task->p_opptr->pid : 0;
@@ -397,7 +396,7 @@
 		task->nswap,
 		task->cnswap,
 		task->exit_signal,
-		task->processor);
+		task->cpu);
 	if(mm)
 		mmput(mm);
 	return res;
@@ -422,9 +421,11 @@
 	if (end > PMD_SIZE)
 		end = PMD_SIZE;
 	do {
-		pte_t page = *pte;
+		pte_t page;
 		struct page *ptpage;
 
+		conditional_schedule();		/* For `top' and `ps' */
+		page = *pte;
 		address += PAGE_SIZE;
 		pte++;
 		if (pte_none(page))
diff -Nur linux-2.4.33-imedia/fs/proc/generic.c linux-2.4.33-imedia-patching/fs/proc/generic.c
--- linux-2.4.33-imedia/fs/proc/generic.c	2005-01-19 16:10:11.000000000 +0200
+++ linux-2.4.33-imedia-patching/fs/proc/generic.c	2006-01-26 15:19:42.000000000 +0200
@@ -101,6 +101,8 @@
 				retval = n;
 			break;
 		}
+
+		conditional_schedule();		/* Some /proc files are large */
 		
 		/* This is a hack to allow mangling of file pos independent
  		 * of actual bytes read.  Simply place the data at page,
diff -Nur linux-2.4.33-imedia/fs/proc/proc_misc.c linux-2.4.33-imedia-patching/fs/proc/proc_misc.c
--- linux-2.4.33-imedia/fs/proc/proc_misc.c	2004-08-08 02:26:06.000000000 +0300
+++ linux-2.4.33-imedia-patching/fs/proc/proc_misc.c	2006-01-26 15:19:42.000000000 +0200
@@ -109,11 +109,11 @@
 	a = avenrun[0] + (FIXED_1/200);
 	b = avenrun[1] + (FIXED_1/200);
 	c = avenrun[2] + (FIXED_1/200);
-	len = sprintf(page,"%d.%02d %d.%02d %d.%02d %d/%d %d\n",
+	len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
 		LOAD_INT(a), LOAD_FRAC(a),
 		LOAD_INT(b), LOAD_FRAC(b),
 		LOAD_INT(c), LOAD_FRAC(c),
-		nr_running, nr_threads, last_pid);
+		nr_running(), nr_threads, last_pid);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
@@ -125,7 +125,7 @@
 	int len;
 
 	uptime = jiffies;
-	idle = init_tasks[0]->times.tms_utime + init_tasks[0]->times.tms_stime;
+	idle = init_task.times.tms_utime + init_task.times.tms_stime;
 
 	/* The formula for the fraction parts really is ((t * 100) / HZ) % 100, but
 	   that would overflow about every five days at HZ == 100.
@@ -374,10 +374,10 @@
 	}
 
 	proc_sprintf(page, &off, &len,
-		"\nctxt %u\n"
+		"\nctxt %lu\n"
 		"btime %lu\n"
 		"processes %lu\n",
-		kstat.context_swtch,
+		nr_context_switches(),
 		xtime.tv_sec - jif / HZ,
 		total_forks);
 
diff -Nur linux-2.4.33-imedia/fs/reiserfs/buffer2.c linux-2.4.33-imedia-patching/fs/reiserfs/buffer2.c
--- linux-2.4.33-imedia/fs/reiserfs/buffer2.c	2003-08-25 14:44:43.000000000 +0300
+++ linux-2.4.33-imedia-patching/fs/reiserfs/buffer2.c	2006-01-26 15:19:42.000000000 +0200
@@ -40,6 +40,8 @@
   }
 }
 
+extern unsigned long nr_context_switches(void);
+
 /*
  * reiserfs_bread() reads a specified block and returns the buffer that contains
  * it. It returns NULL if the block was unreadable.
@@ -51,11 +53,12 @@
 struct buffer_head  * reiserfs_bread (struct super_block *super, int n_block, int n_size) 
 {
     struct buffer_head  *result;
-    PROC_EXP( unsigned int ctx_switches = kstat.context_swtch );
+    PROC_EXP( unsigned int ctx_switches = nr_context_switches() );
 
     result = bread (super -> s_dev, n_block, n_size);
+    conditional_schedule();
     PROC_INFO_INC( super, breads );
-    PROC_EXP( if( kstat.context_swtch != ctx_switches ) 
+    PROC_EXP( if( nr_context_switches() != ctx_switches ) 
 	      PROC_INFO_INC( super, bread_miss ) );
     return result;
 }
diff -Nur linux-2.4.33-imedia/fs/reiserfs/journal.c linux-2.4.33-imedia-patching/fs/reiserfs/journal.c
--- linux-2.4.33-imedia/fs/reiserfs/journal.c	2004-08-08 02:26:06.000000000 +0300
+++ linux-2.4.33-imedia-patching/fs/reiserfs/journal.c	2006-01-26 15:19:42.000000000 +0200
@@ -577,6 +577,7 @@
 /* lock the current transaction */
 inline static void lock_journal(struct super_block *p_s_sb) {
   PROC_INFO_INC( p_s_sb, journal.lock_journal );
+  conditional_schedule();
   while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_wlock)) > 0) {
     PROC_INFO_INC( p_s_sb, journal.lock_journal_wait );
     sleep_on(&(SB_JOURNAL(p_s_sb)->j_wait)) ;
@@ -707,6 +708,7 @@
 	mark_buffer_dirty(tbh) ;
       }
       ll_rw_block(WRITE, 1, &tbh) ;
+      conditional_schedule();
       count++ ;
       put_bh(tbh) ; /* once for our get_hash */
     } 
@@ -836,6 +838,7 @@
     set_bit(BH_Dirty, &(SB_JOURNAL(p_s_sb)->j_header_bh->b_state)) ;
     ll_rw_block(WRITE, 1, &(SB_JOURNAL(p_s_sb)->j_header_bh)) ;
     wait_on_buffer((SB_JOURNAL(p_s_sb)->j_header_bh)) ; 
+    conditional_schedule();
     if (!buffer_uptodate(SB_JOURNAL(p_s_sb)->j_header_bh)) {
       reiserfs_warning( p_s_sb, "reiserfs: journal-837: IO error during journal replay\n" );
       return -EIO ;
@@ -2363,6 +2366,7 @@
 }
 
 int journal_begin(struct reiserfs_transaction_handle *th, struct super_block  * p_s_sb, unsigned long nblocks) {
+  conditional_schedule();
   return do_journal_begin_r(th, p_s_sb, nblocks, 0) ;
 }
 
@@ -2503,6 +2507,7 @@
 }
 
 int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) {
+  conditional_schedule();
   return do_journal_end(th, p_s_sb, nblocks, 0) ;
 }
 
@@ -2974,6 +2979,7 @@
       RFALSE( buffer_locked(bh) && cur_tb != NULL,
 	      "waiting while do_balance was running\n") ;
       wait_on_buffer(bh) ;
+      conditional_schedule();
     }
     PROC_INFO_INC( p_s_sb, journal.prepare_retry );
     retry_count++ ;
@@ -3148,6 +3154,7 @@
     /* copy all the real blocks into log area.  dirty log blocks */
     if (test_bit(BH_JDirty, &cn->bh->b_state)) {
       struct buffer_head *tmp_bh ;
+      conditional_schedule();
       tmp_bh =  journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 
 		       ((cur_write_start + jindex) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ;
       mark_buffer_uptodate(tmp_bh, 1) ;
diff -Nur linux-2.4.33-imedia/fs/reiserfs/stree.c linux-2.4.33-imedia-patching/fs/reiserfs/stree.c
--- linux-2.4.33-imedia/fs/reiserfs/stree.c	2003-08-25 14:44:43.000000000 +0300
+++ linux-2.4.33-imedia-patching/fs/reiserfs/stree.c	2006-01-26 15:19:42.000000000 +0200
@@ -652,9 +652,8 @@
                                        stop at leaf level - set to
                                        DISK_LEAF_NODE_LEVEL */
     ) {
-    int  n_block_number = SB_ROOT_BLOCK (p_s_sb),
-      expected_level = SB_TREE_HEIGHT (p_s_sb),
-      n_block_size    = p_s_sb->s_blocksize;
+    int n_block_number, expected_level;
+    int n_block_size    = p_s_sb->s_blocksize;
     struct buffer_head  *       p_s_bh;
     struct path_element *       p_s_last_element;
     int				n_node_level, n_retval;
@@ -666,7 +665,8 @@
 #endif
     
     PROC_INFO_INC( p_s_sb, search_by_key );
-    
+    conditional_schedule();
+
     /* As we add each node to a path we increase its count.  This means that
        we must be careful to release all nodes in a path before we either
        discard the path struct or re-use the path struct, as we do here. */
@@ -678,6 +678,8 @@
     /* With each iteration of this loop we search through the items in the
        current node, and calculate the next current node(next path element)
        for the next iteration of this loop.. */
+    n_block_number = SB_ROOT_BLOCK (p_s_sb);
+    expected_level = SB_TREE_HEIGHT (p_s_sb);
     while ( 1 ) {
 
 #ifdef CONFIG_REISERFS_CHECK
@@ -1104,6 +1106,8 @@
 	    for (n_counter = *p_n_removed;
 		 n_counter < n_unfm_number; n_counter++, p_n_unfm_pointer-- ) {
 
+		conditional_schedule();
+
 		if (item_moved (&s_ih, p_s_path)) {
 		    need_research = 1 ;
 		    break;
diff -Nur linux-2.4.33-imedia/include/asm-alpha/bitops.h linux-2.4.33-imedia-patching/include/asm-alpha/bitops.h
--- linux-2.4.33-imedia/include/asm-alpha/bitops.h	2001-10-13 01:35:54.000000000 +0300
+++ linux-2.4.33-imedia-patching/include/asm-alpha/bitops.h	2006-01-26 15:19:42.000000000 +0200
@@ -3,6 +3,7 @@
 
 #include <linux/config.h>
 #include <linux/kernel.h>
+#include <asm/compiler.h>
 
 /*
  * Copyright 1994, Linus Torvalds.
@@ -60,25 +61,25 @@
 
 	__asm__ __volatile__(
 	"1:	ldl_l %0,%3\n"
-	"	and %0,%2,%0\n"
+	"	bic %0,%2,%0\n"
 	"	stl_c %0,%1\n"
 	"	beq %0,2f\n"
 	".subsection 2\n"
 	"2:	br 1b\n"
 	".previous"
 	:"=&r" (temp), "=m" (*m)
-	:"Ir" (~(1UL << (nr & 31))), "m" (*m));
+	:"Ir" (1UL << (nr & 31)), "m" (*m));
 }
 
 /*
  * WARNING: non atomic version.
  */
 static __inline__ void
-__change_bit(unsigned long nr, volatile void * addr)
+__clear_bit(unsigned long nr, volatile void * addr)
 {
 	int *m = ((int *) addr) + (nr >> 5);
 
-	*m ^= 1 << (nr & 31);
+	*m &= ~(1 << (nr & 31));
 }
 
 static inline void
@@ -99,6 +100,17 @@
 	:"Ir" (1UL << (nr & 31)), "m" (*m));
 }
 
+/*
+ * WARNING: non atomic version.
+ */
+static __inline__ void
+__change_bit(unsigned long nr, volatile void * addr)
+{
+	int *m = ((int *) addr) + (nr >> 5);
+
+	*m ^= 1 << (nr & 31);
+}
+
 static inline int
 test_and_set_bit(unsigned long nr, volatile void *addr)
 {
@@ -181,20 +193,6 @@
 	return (old & mask) != 0;
 }
 
-/*
- * WARNING: non atomic version.
- */
-static __inline__ int
-__test_and_change_bit(unsigned long nr, volatile void * addr)
-{
-	unsigned long mask = 1 << (nr & 0x1f);
-	int *m = ((int *) addr) + (nr >> 5);
-	int old = *m;
-
-	*m = old ^ mask;
-	return (old & mask) != 0;
-}
-
 static inline int
 test_and_change_bit(unsigned long nr, volatile void * addr)
 {
@@ -220,6 +218,20 @@
 	return oldbit != 0;
 }
 
+/*
+ * WARNING: non atomic version.
+ */
+static __inline__ int
+__test_and_change_bit(unsigned long nr, volatile void * addr)
+{
+	unsigned long mask = 1 << (nr & 0x1f);
+	int *m = ((int *) addr) + (nr >> 5);
+	int old = *m;
+
+	*m = old ^ mask;
+	return (old & mask) != 0;
+}
+
 static inline int
 test_bit(int nr, volatile void * addr)
 {
@@ -235,12 +247,15 @@
  */
 static inline unsigned long ffz_b(unsigned long x)
 {
-	unsigned long sum = 0;
+	unsigned long sum, x1, x2, x4;
 
 	x = ~x & -~x;		/* set first 0 bit, clear others */
-	if (x & 0xF0) sum += 4;
-	if (x & 0xCC) sum += 2;
-	if (x & 0xAA) sum += 1;
+	x1 = x & 0xAA;
+	x2 = x & 0xCC;
+	x4 = x & 0xF0;
+	sum = x2 ? 2 : 0;
+	sum += (x4 != 0) * 4;
+	sum += (x1 != 0);
 
 	return sum;
 }
@@ -257,24 +272,46 @@
 
 	__asm__("cmpbge %1,%2,%0" : "=r"(bits) : "r"(word), "r"(~0UL));
 	qofs = ffz_b(bits);
-	__asm__("extbl %1,%2,%0" : "=r"(bits) : "r"(word), "r"(qofs));
+	bits = __kernel_extbl(word, qofs);
 	bofs = ffz_b(bits);
 
 	return qofs*8 + bofs;
 #endif
 }
 
+/*
+ * __ffs = Find First set bit in word.  Undefined if no set bit exists.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+#if defined(__alpha_cix__) && defined(__alpha_fix__)
+	/* Whee.  EV67 can calculate it directly.  */
+	unsigned long result;
+	__asm__("cttz %1,%0" : "=r"(result) : "r"(word));
+	return result;
+#else
+	unsigned long bits, qofs, bofs;
+
+	__asm__("cmpbge $31,%1,%0" : "=r"(bits) : "r"(word));
+	qofs = ffz_b(bits);
+	bits = __kernel_extbl(word, qofs);
+	bofs = ffz_b(~bits);
+
+	return qofs*8 + bofs;
+#endif
+}
+
 #ifdef __KERNEL__
 
 /*
  * ffs: find first bit set. This is defined the same way as
  * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
+ * differs in spirit from the above __ffs.
  */
 
 static inline int ffs(int word)
 {
-	int result = ffz(~word);
+	int result = __ffs(word);
 	return word ? result+1 : 0;
 }
 
@@ -316,6 +353,14 @@
 #define hweight16(x) hweight64((x) & 0xfffful)
 #define hweight8(x)  hweight64((x) & 0xfful)
 #else
+static inline unsigned long hweight64(unsigned long w)
+{
+	unsigned long result;
+	for (result = 0; w ; w >>= 1)
+		result += (w & 1);
+	return result;
+}
+
 #define hweight32(x) generic_hweight32(x)
 #define hweight16(x) generic_hweight16(x)
 #define hweight8(x)  generic_hweight8(x)
@@ -369,9 +414,32 @@
  */
 #define find_first_zero_bit(addr, size) \
 	find_next_zero_bit((addr), (size), 0)
+#define find_first_bit(addr, size) \
+	find_next_bit((addr), (size), 0)
 
 #ifdef __KERNEL__
 
+/*
+ * Every architecture must define this function. It's the fastest
+ * way of searching a 140-bit bitmap where the first 100 bits are
+ * unlikely to be set. It's guaranteed that at least one of the 140
+ * bits is set.
+ */
+static inline unsigned long
+sched_find_first_bit(unsigned long b[3])
+{
+	unsigned long b0 = b[0], b1 = b[1], b2 = b[2];
+	unsigned long ofs;
+
+	ofs = (b1 ? 64 : 128);
+	b1 = (b1 ? b1 : b2);
+	ofs = (b0 ? 0 : ofs);
+	b0 = (b0 ? b0 : b1);
+
+	return __ffs(b0) + ofs;
+}
+
+
 #define ext2_set_bit                 __test_and_set_bit
 #define ext2_clear_bit               __test_and_clear_bit
 #define ext2_test_bit                test_bit
diff -Nur linux-2.4.33-imedia/include/asm-arm/bitops.h linux-2.4.33-imedia-patching/include/asm-arm/bitops.h
--- linux-2.4.33-imedia/include/asm-arm/bitops.h	2003-08-25 14:44:43.000000000 +0300
+++ linux-2.4.33-imedia-patching/include/asm-arm/bitops.h	2006-01-26 15:19:42.000000000 +0200
@@ -2,6 +2,8 @@
  * Copyright 1995, Russell King.
  * Various bits and pieces copyrights include:
  *  Linus Torvalds (test_bit).
+ * Big endian support: Copyright 2001, Nicolas Pitre
+ *  reworked by rmk.
  *
  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  *
@@ -17,81 +19,267 @@
 
 #ifdef __KERNEL__
 
+#include <asm/system.h>
+
 #define smp_mb__before_clear_bit()	do { } while (0)
 #define smp_mb__after_clear_bit()	do { } while (0)
 
 /*
- * Function prototypes to keep gcc -Wall happy.
+ * These functions are the basis of our bit ops.
+ * First, the atomic bitops.
+ *
+ * The endian issue for these functions is handled by the macros below.
  */
-extern void set_bit(int nr, volatile void * addr);
+static inline void
+____atomic_set_bit_mask(unsigned int mask, volatile unsigned char *p)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	*p |= mask;
+	local_irq_restore(flags);
+}
+
+static inline void
+____atomic_clear_bit_mask(unsigned int mask, volatile unsigned char *p)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	*p &= ~mask;
+	local_irq_restore(flags);
+}
+
+static inline void
+____atomic_change_bit_mask(unsigned int mask, volatile unsigned char *p)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	*p ^= mask;
+	local_irq_restore(flags);
+}
 
-static inline void __set_bit(int nr, volatile void *addr)
+static inline int
+____atomic_test_and_set_bit_mask(unsigned int mask, volatile unsigned char *p)
 {
-	((unsigned char *) addr)[nr >> 3] |= (1U << (nr & 7));
+	unsigned long flags;
+	unsigned int res;
+
+	local_irq_save(flags);
+	res = *p;
+	*p = res | mask;
+	local_irq_restore(flags);
+
+	return res & mask;
 }
 
-extern void clear_bit(int nr, volatile void * addr);
+static inline int
+____atomic_test_and_clear_bit_mask(unsigned int mask, volatile unsigned char *p)
+{
+	unsigned long flags;
+	unsigned int res;
+
+	local_irq_save(flags);
+	res = *p;
+	*p = res & ~mask;
+	local_irq_restore(flags);
+
+	return res & mask;
+}
 
-static inline void __clear_bit(int nr, volatile void *addr)
+static inline int
+____atomic_test_and_change_bit_mask(unsigned int mask, volatile unsigned char *p)
 {
-	((unsigned char *) addr)[nr >> 3] &= ~(1U << (nr & 7));
+	unsigned long flags;
+	unsigned int res;
+
+	local_irq_save(flags);
+	res = *p;
+	*p = res ^ mask;
+	local_irq_restore(flags);
+
+	return res & mask;
 }
 
-extern void change_bit(int nr, volatile void * addr);
+/*
+ * Now the non-atomic variants.  We let the compiler handle all optimisations
+ * for these.
+ */
+static inline void ____nonatomic_set_bit(int nr, volatile void *p)
+{
+	((unsigned char *) p)[nr >> 3] |= (1U << (nr & 7));
+}
 
-static inline void __change_bit(int nr, volatile void *addr)
+static inline void ____nonatomic_clear_bit(int nr, volatile void *p)
 {
-	((unsigned char *) addr)[nr >> 3] ^= (1U << (nr & 7));
+	((unsigned char *) p)[nr >> 3] &= ~(1U << (nr & 7));
 }
 
-extern int test_and_set_bit(int nr, volatile void * addr);
+static inline void ____nonatomic_change_bit(int nr, volatile void *p)
+{
+	((unsigned char *) p)[nr >> 3] ^= (1U << (nr & 7));
+}
 
-static inline int __test_and_set_bit(int nr, volatile void *addr)
+static inline int ____nonatomic_test_and_set_bit(int nr, volatile void *p)
 {
 	unsigned int mask = 1 << (nr & 7);
 	unsigned int oldval;
 
-	oldval = ((unsigned char *) addr)[nr >> 3];
-	((unsigned char *) addr)[nr >> 3] = oldval | mask;
+	oldval = ((unsigned char *) p)[nr >> 3];
+	((unsigned char *) p)[nr >> 3] = oldval | mask;
 	return oldval & mask;
 }
 
-extern int test_and_clear_bit(int nr, volatile void * addr);
-
-static inline int __test_and_clear_bit(int nr, volatile void *addr)
+static inline int ____nonatomic_test_and_clear_bit(int nr, volatile void *p)
 {
 	unsigned int mask = 1 << (nr & 7);
 	unsigned int oldval;
 
-	oldval = ((unsigned char *) addr)[nr >> 3];
-	((unsigned char *) addr)[nr >> 3] = oldval & ~mask;
+	oldval = ((unsigned char *) p)[nr >> 3];
+	((unsigned char *) p)[nr >> 3] = oldval & ~mask;
 	return oldval & mask;
 }
 
-extern int test_and_change_bit(int nr, volatile void * addr);
-
-static inline int __test_and_change_bit(int nr, volatile void *addr)
+static inline int ____nonatomic_test_and_change_bit(int nr, volatile void *p)
 {
 	unsigned int mask = 1 << (nr & 7);
 	unsigned int oldval;
 
-	oldval = ((unsigned char *) addr)[nr >> 3];
-	((unsigned char *) addr)[nr >> 3] = oldval ^ mask;
+	oldval = ((unsigned char *) p)[nr >> 3];
+	((unsigned char *) p)[nr >> 3] = oldval ^ mask;
 	return oldval & mask;
 }
 
-extern int find_first_zero_bit(void * addr, unsigned size);
-extern int find_next_zero_bit(void * addr, int size, int offset);
-
 /*
  * This routine doesn't need to be atomic.
  */
-static inline int test_bit(int nr, const void * addr)
+static inline int ____test_bit(int nr, const void * p)
 {
-    return (((unsigned char *) addr)[nr >> 3] >> (nr & 7)) & 1;
+    return (((unsigned char *) p)[nr >> 3] >> (nr & 7)) & 1;
 }	
 
 /*
+ *  A note about Endian-ness.
+ *  -------------------------
+ *
+ * When the ARM is put into big endian mode via CR15, the processor
+ * merely swaps the order of bytes within words, thus:
+ *
+ *          ------------ physical data bus bits -----------
+ *          D31 ... D24  D23 ... D16  D15 ... D8  D7 ... D0
+ * little     byte 3       byte 2       byte 1      byte 0
+ * big        byte 0       byte 1       byte 2      byte 3
+ *
+ * This means that reading a 32-bit word at address 0 returns the same
+ * value irrespective of the endian mode bit.
+ *
+ * Peripheral devices should be connected with the data bus reversed in
+ * "Big Endian" mode.  ARM Application Note 61 is applicable, and is
+ * available from http://www.arm.com/.
+ *
+ * The following assumes that the data bus connectivity for big endian
+ * mode has been followed.
+ *
+ * Note that bit 0 is defined to be 32-bit word bit 0, not byte 0 bit 0.
+ */
+
+/*
+ * Little endian assembly bitops.  nr = 0 -> byte 0 bit 0.
+ */
+extern void _set_bit_le(int nr, volatile void * p);
+extern void _clear_bit_le(int nr, volatile void * p);
+extern void _change_bit_le(int nr, volatile void * p);
+extern int _test_and_set_bit_le(int nr, volatile void * p);
+extern int _test_and_clear_bit_le(int nr, volatile void * p);
+extern int _test_and_change_bit_le(int nr, volatile void * p);
+extern int _find_first_zero_bit_le(void * p, unsigned size);
+extern int _find_next_zero_bit_le(void * p, int size, int offset);
+
+/*
+ * Big endian assembly bitops.  nr = 0 -> byte 3 bit 0.
+ */
+extern void _set_bit_be(int nr, volatile void * p);
+extern void _clear_bit_be(int nr, volatile void * p);
+extern void _change_bit_be(int nr, volatile void * p);
+extern int _test_and_set_bit_be(int nr, volatile void * p);
+extern int _test_and_clear_bit_be(int nr, volatile void * p);
+extern int _test_and_change_bit_be(int nr, volatile void * p);
+extern int _find_first_zero_bit_be(void * p, unsigned size);
+extern int _find_next_zero_bit_be(void * p, int size, int offset);
+
+
+/*
+ * The __* form of bitops are non-atomic and may be reordered.
+ */
+#define	ATOMIC_BITOP_LE(name,nr,p)				\
+	(__builtin_constant_p(nr) ?				\
+	____atomic_##name##_mask(1 << ((nr) & 7),		\
+			((unsigned char *)(p)) + ((nr) >> 3)) :	\
+	_##name##_le(nr,p))
+
+#define	ATOMIC_BITOP_BE(name,nr,p)					\
+	(__builtin_constant_p(nr) ?					\
+	____atomic_##name##_mask(1 << ((nr) & 7),			\
+			((unsigned char *)(p)) + (((nr) >> 3) ^ 3)) :	\
+	_##name##_be(nr,p))
+
+#define NONATOMIC_BITOP_LE(name,nr,p)	\
+	(____nonatomic_##name(nr, p))
+
+#define NONATOMIC_BITOP_BE(name,nr,p)	\
+	(____nonatomic_##name(nr ^ 0x18, p))
+
+#ifndef __ARMEB__
+/*
+ * These are the little endian, atomic definitions.
+ */
+#define set_bit(nr,p)			ATOMIC_BITOP_LE(set_bit,nr,p)
+#define clear_bit(nr,p)			ATOMIC_BITOP_LE(clear_bit,nr,p)
+#define change_bit(nr,p)		ATOMIC_BITOP_LE(change_bit,nr,p)
+#define test_and_set_bit(nr,p)		ATOMIC_BITOP_LE(test_and_set_bit,nr,p)
+#define test_and_clear_bit(nr,p)	ATOMIC_BITOP_LE(test_and_clear_bit,nr,p)+#define test_and_change_bit(nr,p)      ATOMIC_BITOP_LE(test_and_change_bit,nr,p)
+#define test_bit(nr,p)			____test_bit(nr,p)
+#define find_first_zero_bit(p,sz)	_find_first_zero_bit_le(p,sz)
+#define find_next_zero_bit(p,sz,off)	_find_next_zero_bit_le(p,sz,off)
+
+/*
+ * These are the little endian, non-atomic definitions.
+ */
+#define __set_bit(nr,p)			NONATOMIC_BITOP_LE(set_bit,nr,p)+#define __clear_bit(nr,p)              NONATOMIC_BITOP_LE(clear_bit,nr,p)
+#define __change_bit(nr,p)		NONATOMIC_BITOP_LE(change_bit,nr,p)
+#define __test_and_set_bit(nr,p)	NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
+#define __test_and_clear_bit(nr,p)	NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
+#define __test_and_change_bit(nr,p)	NONATOMIC_BITOP_LE(test_and_change_bit,nr,p)
+#define __test_bit(nr,p)		____test_bit(nr,p)
+
+#else
+
+/*
+ * These are the big endian, atomic definitions.
+ */
+#define set_bit(nr,p)			ATOMIC_BITOP_BE(set_bit,nr,p)
+#define clear_bit(nr,p)			ATOMIC_BITOP_BE(clear_bit,nr,p)
+#define change_bit(nr,p)		ATOMIC_BITOP_BE(change_bit,nr,p)
+#define test_and_set_bit(nr,p)		ATOMIC_BITOP_BE(test_and_set_bit,nr,p)
+#define test_and_clear_bit(nr,p)	ATOMIC_BITOP_BE(test_and_clear_bit,nr,p)+#define test_and_change_bit(nr,p)      ATOMIC_BITOP_BE(test_and_change_bit,nr,p)
+#define test_bit(nr,p)			____test_bit((nr) ^ 0x18, p)
+#define find_first_zero_bit(p,sz)	_find_first_zero_bit_be(p,sz)
+#define find_next_zero_bit(p,sz,off)	_find_next_zero_bit_be(p,sz,off)
+
+/*
+ * These are the big endian, non-atomic definitions.
+ */
+#define __set_bit(nr,p)			NONATOMIC_BITOP_BE(set_bit,nr,p)+#define __clear_bit(nr,p)              NONATOMIC_BITOP_BE(clear_bit,nr,p)
+#define __change_bit(nr,p)		NONATOMIC_BITOP_BE(change_bit,nr,p)
+#define __test_and_set_bit(nr,p)	NONATOMIC_BITOP_BE(test_and_set_bit,nr,p)
+#define __test_and_clear_bit(nr,p)	NONATOMIC_BITOP_BE(test_and_clear_bit,nr,p)
+#define __test_and_change_bit(nr,p)	NONATOMIC_BITOP_BE(test_and_change_bit,nr,p)
+#define __test_bit(nr,p)		____test_bit((nr) ^ 0x18, p)
+
+#endif
+
+/*
  * ffz = Find First Zero in word. Undefined if no zero exists,
  * so code should check against ~0UL first..
  */
@@ -110,6 +298,29 @@
 }
 
 /*
+ * ffz = Find First Zero in word. Undefined if no zero exists,
+ * so code should check against ~0UL first..
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+	int k;
+
+	k = 31;
+	if (word & 0x0000ffff) { k -= 16; word <<= 16; }
+	if (word & 0x00ff0000) { k -= 8;  word <<= 8;  }
+	if (word & 0x0f000000) { k -= 4;  word <<= 4;  }
+	if (word & 0x30000000) { k -= 2;  word <<= 2;  }
+	if (word & 0x40000000) { k -= 1; }
+        return k;
+}
+
+/*
+ * fls: find last bit set.
+ */
+
+#define fls(x) generic_fls(x)
+
+/*
  * ffs: find first bit set. This is defined the same way as
  * the libc and compiler builtin ffs routines, therefore
  * differs in spirit from the above ffz (man ffs).
@@ -118,6 +329,22 @@
 #define ffs(x) generic_ffs(x)
 
 /*
+ * Find first bit set in a 168-bit bitmap, where the first
+ * 128 bits are unlikely to be set.
+ */
+static inline int sched_find_first_bit(unsigned long *b)
+{
+	unsigned long v;
+	unsigned int off;
+
+	for (off = 0; v = b[off], off < 4; off++) {
+		if (unlikely(v))
+			break;
+	}
+	return __ffs(v) + off * 32;
+}
+
+/*
  * hweightN: returns the hamming weight (i.e. the number
  * of bits set) of a N-bit word
  */
@@ -126,18 +353,25 @@
 #define hweight16(x) generic_hweight16(x)
 #define hweight8(x) generic_hweight8(x)
 
-#define ext2_set_bit			test_and_set_bit
-#define ext2_clear_bit			test_and_clear_bit
-#define ext2_test_bit			test_bit
-#define ext2_find_first_zero_bit	find_first_zero_bit
-#define ext2_find_next_zero_bit		find_next_zero_bit
-
-/* Bitmap functions for the minix filesystem. */
-#define minix_test_and_set_bit(nr,addr)	test_and_set_bit(nr,addr)
-#define minix_set_bit(nr,addr)		set_bit(nr,addr)
-#define minix_test_and_clear_bit(nr,addr)	test_and_clear_bit(nr,addr)
-#define minix_test_bit(nr,addr)		test_bit(nr,addr)
-#define minix_find_first_zero_bit(addr,size)	find_first_zero_bit(addr,size)
+/*
+ * Ext2 is defined to use little-endian byte ordering.
+ * These do not need to be atomic.
+ */
+#define ext2_set_bit(nr,p)			NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
+#define ext2_clear_bit(nr,p)			NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
+#define ext2_test_bit(nr,p)			__test_bit(nr,p)
+#define ext2_find_first_zero_bit(p,sz)		_find_first_zero_bit_le(p,sz)
+#define ext2_find_next_zero_bit(p,sz,off)	_find_next_zero_bit_le(p,sz,off)
+
+/*
+ * Minix is defined to use little-endian byte ordering.
+ * These do not need to be atomic.
+ */
+#define minix_set_bit(nr,p)			NONATOMIC_BITOP_LE(set_bit,nr,p)
+#define minix_test_bit(nr,p)			__test_bit(nr,p)
+#define minix_test_and_set_bit(nr,p)		NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
+#define minix_test_and_clear_bit(nr,p)		NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
+#define minix_find_first_zero_bit(p,sz)		_find_first_zero_bit_le(p,sz)
 
 #endif /* __KERNEL__ */
 
diff -Nur linux-2.4.33-imedia/include/asm-cris/bitops.h linux-2.4.33-imedia-patching/include/asm-cris/bitops.h
--- linux-2.4.33-imedia/include/asm-cris/bitops.h	2004-02-18 15:36:32.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-cris/bitops.h	2006-01-26 15:19:42.000000000 +0200
@@ -22,6 +22,7 @@
 /* We use generic_ffs so get it; include guards resolve the possible
    mutually inclusion.  */
 #include <linux/bitops.h>
+#include <linux/compiler.h>
 
 /*
  * Some hacks to defeat gcc over-optimizations..
@@ -375,7 +376,45 @@
 #define minix_test_bit(nr,addr) test_bit(nr,addr)
 #define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
 
-#endif /* __KERNEL__ */
+#if 0
+/* TODO: see below */
+#define sched_find_first_zero_bit(addr) find_first_zero_bit(addr, 168)
+ 
+#else
+/* TODO: left out pending where to put it.. (there are .h dependencies) */
+
+ /*
+ * Every architecture must define this function. It's the fastest
+ * way of searching a 168-bit bitmap where the first 128 bits are
+ * unlikely to be set. It's guaranteed that at least one of the 168
+ * bits is cleared.
+ */
+#if 0
+#if MAX_RT_PRIO != 128 || MAX_PRIO != 168
+# error update this function.
+#endif
+#else
+#define MAX_RT_PRIO 128
+#define MAX_PRIO 168
+#endif
+
+static inline int sched_find_first_zero_bit(char *bitmap)
+{
+	unsigned int *b = (unsigned int *)bitmap;
+	unsigned int rt;
+
+	rt = b[0] & b[1] & b[2] & b[3];
+	if (unlikely(rt != 0xffffffff))
+		return find_first_zero_bit(bitmap, MAX_RT_PRIO);
+
+	if (b[4] != ~0)
+		return ffz(b[4]) + MAX_RT_PRIO;
+	return ffz(b[5]) + 32 + MAX_RT_PRIO;
+}
+#undef MAX_PRIO
+#undef MAX_RT_PRIO
+#endif
 
+#endif /* __KERNEL__ */
 
 #endif /* _CRIS_BITOPS_H */
diff -Nur linux-2.4.33-imedia/include/asm-generic/bitops.h linux-2.4.33-imedia-patching/include/asm-generic/bitops.h
--- linux-2.4.33-imedia/include/asm-generic/bitops.h	2000-11-28 03:47:38.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-generic/bitops.h	2006-01-26 15:19:42.000000000 +0200
@@ -51,6 +51,12 @@
 	return ((mask & *addr) != 0);
 }
 
+/*
+ * fls: find last bit set.
+ */
+
+#define fls(x) generic_fls(x)
+
 #ifdef __KERNEL__
 
 /*
diff -Nur linux-2.4.33-imedia/include/asm-i386/bitops.h linux-2.4.33-imedia-patching/include/asm-i386/bitops.h
--- linux-2.4.33-imedia/include/asm-i386/bitops.h	2006-01-11 19:27:16.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-i386/bitops.h	2006-01-26 15:19:42.000000000 +0200
@@ -6,6 +6,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/compiler.h>
 
 /*
  * These have to be done with inline assembly: that way the bit-setting
@@ -75,6 +76,14 @@
 		:"=m" (ADDR)
 		:"Ir" (nr));
 }
+
+static __inline__ void __clear_bit(int nr, volatile void * addr)
+{
+	__asm__ __volatile__(
+		"btrl %1,%0"
+		:"=m" (ADDR)
+		:"Ir" (nr));
+}
 #define smp_mb__before_clear_bit()	barrier()
 #define smp_mb__after_clear_bit()	barrier()
 
@@ -283,6 +292,34 @@
 }
 
 /**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first set bit, not the number of the byte
+ * containing a bit.
+ */
+static __inline__ int find_first_bit(void * addr, unsigned size)
+{
+	int d0, d1;
+	int res;
+
+	/* This looks at memory. Mark it volatile to tell gcc not to move it around */
+	__asm__ __volatile__(
+		"xorl %%eax,%%eax\n\t"
+		"repe; scasl\n\t"
+		"jz 1f\n\t"
+		"leal -4(%%edi),%%edi\n\t"
+		"bsfl (%%edi),%%eax\n"
+		"1:\tsubl %%ebx,%%edi\n\t"
+		"shll $3,%%edi\n\t"
+		"addl %%edi,%%eax"
+		:"=a" (res), "=&c" (d0), "=&D" (d1)
+		:"1" ((size + 31) >> 5), "2" (addr), "b" (addr));
+	return res;
+}
+
+/**
  * find_next_zero_bit - find the first zero bit in a memory region
  * @addr: The address to base the search on
  * @offset: The bitnumber to start searching at
@@ -295,7 +332,7 @@
 	
 	if (bit) {
 		/*
-		 * Look for zero in first byte
+		 * Look for zero in first 32 bits.
 		 */
 		__asm__("bsfl %1,%0\n\t"
 			"jne 1f\n\t"
@@ -316,6 +353,39 @@
 }
 
 /**
+ * find_next_bit - find the first set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+static __inline__ int find_next_bit (void * addr, int size, int offset)
+{
+	unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
+	int set = 0, bit = offset & 31, res;
+
+	if (bit) {
+		/*
+		 * Look for nonzero in the first 32 bits:
+		 */
+		__asm__("bsfl %1,%0\n\t"
+			"jne 1f\n\t"
+			"movl $32, %0\n"
+			"1:"
+			: "=r" (set)
+			: "r" (*p >> bit));
+		if (set < (32 - bit))
+			return set + offset;
+		set = 32 - bit;
+		p++;
+	}
+	/*
+	 * No set bit yet, search remaining full words for a bit
+	 */
+	res = find_first_bit (p, size - 32 * (p - (unsigned long *) addr));
+	return (offset + set + res);
+}
+
+/**
  * ffz - find first zero in word.
  * @word: The word to search
  *
@@ -329,8 +399,41 @@
 	return word;
 }
 
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static __inline__ unsigned long __ffs(unsigned long word)
+{
+	__asm__("bsfl %1,%0"
+		:"=r" (word)
+		:"rm" (word));
+	return word;
+}
+
 #ifdef __KERNEL__
 
+/*
+ * Every architecture must define this function. It's the fastest
+ * way of searching a 140-bit bitmap where the first 100 bits are
+ * unlikely to be set. It's guaranteed that at least one of the 140
+ * bits is cleared.
+ */
+static inline int _sched_find_first_bit(unsigned long *b)
+{
+	if (unlikely(b[0]))
+		return __ffs(b[0]);
+	if (unlikely(b[1]))
+		return __ffs(b[1]) + 32;
+	if (unlikely(b[2]))
+		return __ffs(b[2]) + 64;
+	if (b[3])
+		return __ffs(b[3]) + 96;
+	return __ffs(b[4]) + 128;
+}
+
 /**
  * ffs - find first bit set
  * @x: the word to search
diff -Nur linux-2.4.33-imedia/include/asm-i386/desc.h linux-2.4.33-imedia-patching/include/asm-i386/desc.h
--- linux-2.4.33-imedia/include/asm-i386/desc.h	2004-02-18 15:36:32.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-i386/desc.h	2006-01-26 15:19:42.000000000 +0200
@@ -71,9 +71,12 @@
 
 static inline void clear_LDT(void)
 {
-	int cpu = smp_processor_id();
+	int cpu;
+	preempt_disable();
+	cpu = smp_processor_id();
 	set_ldt_desc(cpu, &default_ldt[0], 5);
 	__load_LDT(cpu);
+	preempt_enable();
 }
 
 /*
diff -Nur linux-2.4.33-imedia/include/asm-i386/hardirq.h linux-2.4.33-imedia-patching/include/asm-i386/hardirq.h
--- linux-2.4.33-imedia/include/asm-i386/hardirq.h	2006-01-11 19:27:17.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-i386/hardirq.h	2006-01-26 15:19:42.000000000 +0200
@@ -19,12 +19,16 @@
 
 /*
  * Are we in an interrupt context? Either doing bottom half
- * or hardware interrupt processing?
+ * or hardware interrupt processing?  Note the preempt check,
+ * this is both a bugfix and an optimization.  If we are
+ * preemptible, we cannot be in an interrupt.
  */
-#define in_interrupt() ({ int __cpu = smp_processor_id(); \
-	(local_irq_count(__cpu) + local_bh_count(__cpu) != 0); })
+#define in_interrupt() (preempt_is_disabled() && \
+	({unsigned long __cpu = smp_processor_id(); \
+	(local_irq_count(__cpu) + local_bh_count(__cpu) != 0); }))
 
-#define in_irq() (local_irq_count(smp_processor_id()) != 0)
+#define in_irq() (preempt_is_disabled() && \
+        (local_irq_count(smp_processor_id()) != 0))
 
 #ifndef CONFIG_SMP
 
@@ -36,6 +40,8 @@
 
 #define synchronize_irq()	barrier()
 
+#define release_irqlock(cpu)	do { } while (0)
+
 #else
 
 #include <asm/atomic.h>
diff -Nur linux-2.4.33-imedia/include/asm-i386/highmem.h linux-2.4.33-imedia-patching/include/asm-i386/highmem.h
--- linux-2.4.33-imedia/include/asm-i386/highmem.h	2006-01-11 19:27:18.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-i386/highmem.h	2006-01-26 15:19:42.000000000 +0200
@@ -91,6 +91,7 @@
 	enum fixed_addresses idx;
 	unsigned long vaddr;
 
+	preempt_disable();
 	if (page < highmem_start_page)
 		return page_address(page);
 
@@ -112,8 +113,10 @@
 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
 	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
 
-	if (vaddr < FIXADDR_START) // FIXME
+	if (vaddr < FIXADDR_START) { // FIXME
+		preempt_enable();
 		return;
+	}
 
 	if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
 		out_of_line_bug();
@@ -125,6 +128,8 @@
 	pte_clear(kmap_pte-idx);
 	__flush_tlb_one(vaddr);
 #endif
+
+	preempt_enable();
 }
 
 #endif /* __KERNEL__ */
diff -Nur linux-2.4.33-imedia/include/asm-i386/hw_irq.h linux-2.4.33-imedia-patching/include/asm-i386/hw_irq.h
--- linux-2.4.33-imedia/include/asm-i386/hw_irq.h	2006-01-11 19:27:17.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-i386/hw_irq.h	2006-01-26 15:19:42.000000000 +0200
@@ -95,6 +95,18 @@
 #define __STR(x) #x
 #define STR(x) __STR(x)
 
+#define GET_CURRENT \
+	"movl %esp, %ebx\n\t" \
+	"andl $-8192, %ebx\n\t"
+
+#ifdef CONFIG_PREEMPT
+#define BUMP_LOCK_COUNT \
+	GET_CURRENT \
+	"incl 4(%ebx)\n\t"
+#else
+#define BUMP_LOCK_COUNT
+#endif
+
 #define SAVE_ALL \
 	"cld\n\t" \
 	"pushl %es\n\t" \
@@ -108,15 +120,12 @@
 	"pushl %ebx\n\t" \
 	"movl $" STR(__KERNEL_DS) ",%edx\n\t" \
 	"movl %edx,%ds\n\t" \
-	"movl %edx,%es\n\t"
+	"movl %edx,%es\n\t" \
+	BUMP_LOCK_COUNT
 
 #define IRQ_NAME2(nr) nr##_interrupt(void)
 #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
 
-#define GET_CURRENT \
-	"movl %esp, %ebx\n\t" \
-	"andl $-8192, %ebx\n\t"
-
 /*
  *	SMP has a few special interrupts for IPI messages
  */
diff -Nur linux-2.4.33-imedia/include/asm-i386/i387.h linux-2.4.33-imedia-patching/include/asm-i386/i387.h
--- linux-2.4.33-imedia/include/asm-i386/i387.h	2006-01-11 19:31:56.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-i386/i387.h	2006-01-26 15:19:42.000000000 +0200
@@ -12,6 +12,7 @@
 #define __ASM_I386_I387_H
 
 #include <linux/sched.h>
+#include <linux/spinlock.h>
 #include <asm/processor.h>
 #include <asm/sigcontext.h>
 #include <asm/user.h>
@@ -24,7 +25,7 @@
 extern void restore_fpu( struct task_struct *tsk );
 
 extern void kernel_fpu_begin(void);
-#define kernel_fpu_end() stts()
+#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0)
 
 
 #define unlazy_fpu( tsk ) do { \
diff -Nur linux-2.4.33-imedia/include/asm-i386/mmu_context.h linux-2.4.33-imedia-patching/include/asm-i386/mmu_context.h
--- linux-2.4.33-imedia/include/asm-i386/mmu_context.h	2006-01-11 19:27:17.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-i386/mmu_context.h	2006-01-26 15:19:42.000000000 +0200
@@ -29,7 +29,7 @@
 
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu)
 {
-	if (prev != next) {
+	if (likely(prev != next)) {
 		/* stop flush ipis for the previous mm */
 		clear_bit(cpu, &prev->cpu_vm_mask);
 #ifdef CONFIG_SMP
diff -Nur linux-2.4.33-imedia/include/asm-i386/pgalloc.h linux-2.4.33-imedia-patching/include/asm-i386/pgalloc.h
--- linux-2.4.33-imedia/include/asm-i386/pgalloc.h	2006-01-11 19:27:17.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-i386/pgalloc.h	2006-01-26 15:19:42.000000000 +0200
@@ -75,20 +75,26 @@
 {
 	unsigned long *ret;
 
+	preempt_disable();
 	if ((ret = pgd_quicklist) != NULL) {
 		pgd_quicklist = (unsigned long *)(*ret);
 		ret[0] = 0;
 		pgtable_cache_size--;
-	} else
+		preempt_enable();
+	} else {
+		preempt_enable();
 		ret = (unsigned long *)get_pgd_slow();
+	}
 	return (pgd_t *)ret;
 }
 
 static inline void free_pgd_fast(pgd_t *pgd)
 {
+	preempt_disable();
 	*(unsigned long *)pgd = (unsigned long) pgd_quicklist;
 	pgd_quicklist = (unsigned long *) pgd;
 	pgtable_cache_size++;
+	preempt_enable();
 }
 
 static inline void free_pgd_slow(pgd_t *pgd)
@@ -119,19 +125,23 @@
 {
 	unsigned long *ret;
 
+	preempt_disable();
 	if ((ret = (unsigned long *)pte_quicklist) != NULL) {
 		pte_quicklist = (unsigned long *)(*ret);
 		ret[0] = ret[1];
 		pgtable_cache_size--;
 	}
+	preempt_enable();
 	return (pte_t *)ret;
 }
 
 static inline void pte_free_fast(pte_t *pte)
 {
+	preempt_disable();
 	*(unsigned long *)pte = (unsigned long) pte_quicklist;
 	pte_quicklist = (unsigned long *) pte;
 	pgtable_cache_size++;
+	preempt_enable();
 }
 
 static __inline__ void pte_free_slow(pte_t *pte)
diff -Nur linux-2.4.33-imedia/include/asm-i386/smp.h linux-2.4.33-imedia-patching/include/asm-i386/smp.h
--- linux-2.4.33-imedia/include/asm-i386/smp.h	2006-01-11 19:27:17.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-i386/smp.h	2006-01-26 15:19:42.000000000 +0200
@@ -40,6 +40,7 @@
 extern void smp_flush_tlb(void);
 extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
 extern void fastcall smp_send_reschedule(int cpu);
+extern void fastcall smp_send_reschedule_all(void);
 extern void smp_invalidate_rcv(void);		/* Process an NMI */
 extern void (*mtrr_hook) (void);
 extern void zap_low_mappings (void);
@@ -81,7 +82,7 @@
  * so this is correct in the x86 case.
  */
 
-#define smp_processor_id() (current->processor)
+#define smp_processor_id() (current->cpu)
 
 static __inline int hard_smp_processor_id(void)
 {
@@ -99,17 +100,5 @@
 
 #define NO_PROC_ID		0xFF		/* No processor magic marker */
 
-/*
- *	This magic constant controls our willingness to transfer
- *	a process across CPUs. Such a transfer incurs misses on the L1
- *	cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
- *	gut feeling is this will vary by board in value. For a board
- *	with separate L2 cache it probably depends also on the RSS, and
- *	for a board with shared L2 cache it ought to decay fast as other
- *	processes are run.
- */
- 
-#define PROC_CHANGE_PENALTY	15		/* Schedule penalty */
-
 #endif
 #endif
diff -Nur linux-2.4.33-imedia/include/asm-i386/smplock.h linux-2.4.33-imedia-patching/include/asm-i386/smplock.h
--- linux-2.4.33-imedia/include/asm-i386/smplock.h	2006-01-11 19:27:17.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-i386/smplock.h	2006-01-26 15:19:42.000000000 +0200
@@ -14,7 +14,15 @@
 extern spinlock_cacheline_t kernel_flag_cacheline;  
 #define kernel_flag kernel_flag_cacheline.lock      
 
+#ifdef CONFIG_SMP
 #define kernel_locked()		spin_is_locked(&kernel_flag)
+#else
+#ifdef CONFIG_PREEMPT
+#define kernel_locked()		preempt_get_count()
+#else
+#define kernel_locked()		1
+#endif
+#endif
 
 /*
  * Release global kernel lock and global interrupt lock
@@ -46,6 +54,11 @@
  */
 static __inline__ void lock_kernel(void)
 {
+#ifdef CONFIG_PREEMPT
+	if (current->lock_depth == -1)
+		spin_lock(&kernel_flag);
+	++current->lock_depth;
+#else
 #if 1
 	if (!++current->lock_depth)
 		spin_lock(&kernel_flag);
@@ -58,6 +71,7 @@
 		:"=m" (__dummy_lock(&kernel_flag)),
 		 "=m" (current->lock_depth));
 #endif
+#endif
 }
 
 static __inline__ void unlock_kernel(void)
diff -Nur linux-2.4.33-imedia/include/asm-i386/softirq.h linux-2.4.33-imedia-patching/include/asm-i386/softirq.h
--- linux-2.4.33-imedia/include/asm-i386/softirq.h	2006-01-11 19:27:17.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-i386/softirq.h	2006-01-26 15:19:42.000000000 +0200
@@ -5,14 +5,15 @@
 #include <asm/hardirq.h>
 
 #define __cpu_bh_enable(cpu) \
-		do { barrier(); local_bh_count(cpu)--; } while (0)
+		do { barrier(); local_bh_count(cpu)--; preempt_enable(); } while (0)
 #define cpu_bh_disable(cpu) \
-		do { local_bh_count(cpu)++; barrier(); } while (0)
+		do { preempt_disable(); local_bh_count(cpu)++; barrier(); } while (0)
 
 #define local_bh_disable()	cpu_bh_disable(smp_processor_id())
 #define __local_bh_enable()	__cpu_bh_enable(smp_processor_id())
 
-#define in_softirq() (local_bh_count(smp_processor_id()) != 0)
+#define in_softirq() ( preempt_is_disabled() & \
+			(local_bh_count(smp_processor_id()) != 0))
 
 /*
  * NOTE: this assembly code assumes:
@@ -22,7 +23,7 @@
  * If you change the offsets in irq_stat then you have to
  * update this code as well.
  */
-#define local_bh_enable()						\
+#define _local_bh_enable()						\
 do {									\
 	unsigned int *ptr = &local_bh_count(smp_processor_id());	\
 									\
@@ -45,4 +46,6 @@
 		/* no registers clobbered */ );				\
 } while (0)
 
+#define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while (0)
+
 #endif	/* __ASM_SOFTIRQ_H */
diff -Nur linux-2.4.33-imedia/include/asm-i386/spinlock.h linux-2.4.33-imedia-patching/include/asm-i386/spinlock.h
--- linux-2.4.33-imedia/include/asm-i386/spinlock.h	2006-01-11 19:27:16.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-i386/spinlock.h	2006-01-26 15:19:42.000000000 +0200
@@ -77,7 +77,7 @@
 		:"=m" (lock->lock) : : "memory"
 
 
-static inline void spin_unlock(spinlock_t *lock)
+static inline void _raw_spin_unlock(spinlock_t *lock)
 {
 #if SPINLOCK_DEBUG
 	if (lock->magic != SPINLOCK_MAGIC)
@@ -97,7 +97,7 @@
 		:"=q" (oldval), "=m" (lock->lock) \
 		:"0" (oldval) : "memory"
 
-static inline void spin_unlock(spinlock_t *lock)
+static inline void _raw_spin_unlock(spinlock_t *lock)
 {
 	char oldval = 1;
 #if SPINLOCK_DEBUG
@@ -113,7 +113,7 @@
 
 #endif
 
-static inline int spin_trylock(spinlock_t *lock)
+static inline int _raw_spin_trylock(spinlock_t *lock)
 {
 	char oldval;
 	__asm__ __volatile__(
@@ -123,7 +123,7 @@
 	return oldval > 0;
 }
 
-static inline void spin_lock(spinlock_t *lock)
+static inline void _raw_spin_lock(spinlock_t *lock)
 {
 #if SPINLOCK_DEBUG
 	__label__ here;
@@ -179,7 +179,7 @@
  */
 /* the spinlock helpers are in arch/i386/kernel/semaphore.c */
 
-static inline void read_lock(rwlock_t *rw)
+static inline void _raw_read_lock(rwlock_t *rw)
 {
 #if SPINLOCK_DEBUG
 	if (rw->magic != RWLOCK_MAGIC)
@@ -188,7 +188,7 @@
 	__build_read_lock(rw, "__read_lock_failed");
 }
 
-static inline void write_lock(rwlock_t *rw)
+static inline void _raw_write_lock(rwlock_t *rw)
 {
 #if SPINLOCK_DEBUG
 	if (rw->magic != RWLOCK_MAGIC)
@@ -197,10 +197,10 @@
 	__build_write_lock(rw, "__write_lock_failed");
 }
 
-#define read_unlock(rw)		asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
-#define write_unlock(rw)	asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
+#define _raw_read_unlock(rw)		asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
+#define _raw_write_unlock(rw)	asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
 
-static inline int write_trylock(rwlock_t *lock)
+static inline int _raw_write_trylock(rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t *)lock;
 	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
diff -Nur linux-2.4.33-imedia/include/asm-i386/system.h linux-2.4.33-imedia-patching/include/asm-i386/system.h
--- linux-2.4.33-imedia/include/asm-i386/system.h	2006-01-11 19:27:16.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-i386/system.h	2006-01-26 15:19:42.000000000 +0200
@@ -12,25 +12,22 @@
 struct task_struct;	/* one of the stranger aspects of C forward declarations.. */
 extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
 
-#define prepare_to_switch()	do { } while(0)
 #define switch_to(prev,next,last) do {					\
 	asm volatile("pushl %%esi\n\t"					\
 		     "pushl %%edi\n\t"					\
 		     "pushl %%ebp\n\t"					\
 		     "movl %%esp,%0\n\t"	/* save ESP */		\
-		     "movl %3,%%esp\n\t"	/* restore ESP */	\
+		     "movl %2,%%esp\n\t"	/* restore ESP */	\
 		     "movl $1f,%1\n\t"		/* save EIP */		\
-		     "pushl %4\n\t"		/* restore EIP */	\
+		     "pushl %3\n\t"		/* restore EIP */	\
 		     "jmp __switch_to\n"				\
 		     "1:\t"						\
 		     "popl %%ebp\n\t"					\
 		     "popl %%edi\n\t"					\
 		     "popl %%esi\n\t"					\
-		     :"=m" (prev->thread.esp),"=m" (prev->thread.eip),	\
-		      "=b" (last)					\
+		     :"=m" (prev->thread.esp),"=m" (prev->thread.eip)	\
 		     :"m" (next->thread.esp),"m" (next->thread.eip),	\
-		      "a" (prev), "d" (next),				\
-		      "b" (prev));					\
+		      "a" (prev), "d" (next));				\
 } while (0)
 
 #define _set_base(addr,base) do { unsigned long __pr; \
@@ -323,6 +320,13 @@
 #define __save_and_cli(x)	do { __save_flags(x); __cli(); } while(0);
 #define __save_and_sti(x)	do { __save_flags(x); __sti(); } while(0);
 
+#define irqs_disabled()			\
+({					\
+	unsigned long flags;		\
+	__save_flags(flags);		\
+	!(flags & (1<<9));		\
+})
+
 /* For spinlocks etc */
 #if 0
 #define local_irq_save(x)	__asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
diff -Nur linux-2.4.33-imedia/include/asm-ia64/bitops.h linux-2.4.33-imedia-patching/include/asm-ia64/bitops.h
--- linux-2.4.33-imedia/include/asm-ia64/bitops.h	2003-11-28 20:26:21.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-ia64/bitops.h	2006-01-26 15:19:42.000000000 +0200
@@ -4,6 +4,9 @@
 /*
  * Copyright (C) 1998-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 02/06/02 find_next_bit() and find_first_bit() added from Erich Focht's ia64 O(1)
+ *	    scheduler patch
  */
 
 #include <linux/types.h>
@@ -91,6 +94,17 @@
 }
 
 /**
+ * __clear_bit - Clears a bit in memory (non-atomic version)
+ */
+static __inline__ void
+__clear_bit (int nr, volatile void *addr)
+{
+	volatile __u32 *p = (__u32 *) addr + (nr >> 5);
+	__u32 m = 1 << (nr & 31);
+	*p &= ~m;
+}
+
+/**
  * change_bit - Toggle a bit in memory
  * @nr: Bit to clear
  * @addr: Address to start counting from
@@ -266,12 +280,11 @@
 }
 
 /**
- * ffz - find the first zero bit in a memory region
- * @x: The address to start the search at
+ * ffz - find the first zero bit in a long word
+ * @x: The long word to find the bit in
  *
- * Returns the bit-number (0..63) of the first (least significant) zero bit, not
- * the number of the byte containing a bit.  Undefined if no zero exists, so
- * code should check against ~0UL first...
+ * Returns the bit-number (0..63) of the first (least significant) zero bit.  Undefined if
+ * no zero exists, so code should check against ~0UL first...
  */
 static inline unsigned long
 ffz (unsigned long x)
@@ -297,6 +310,21 @@
 	return result;
 }
 
+/**
+ * __ffs - find first bit in word.
+ * @x: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static __inline__ unsigned long
+__ffs (unsigned long x)
+{
+	unsigned long result;
+
+	__asm__ ("popcnt %0=%1" : "=r" (result) : "r" ((x - 1) & ~x));
+	return result;
+}
+
 #ifdef __KERNEL__
 
 /*
@@ -313,6 +341,12 @@
 	return exp - 0xffff;
 }
 
+static int
+fls (int x)
+{
+	return ia64_fls((unsigned int) x);
+}
+
 /*
  * ffs: find first bit set. This is defined the same way as the libc and compiler builtin
  * ffs routines, therefore differs in spirit from the above ffz (man ffs): it operates on
@@ -385,8 +419,53 @@
  */
 #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
 
+/*
+ * Find next bit in a bitmap reasonably efficiently..
+ */
+static inline int
+find_next_bit (void *addr, unsigned long size, unsigned long offset)
+{
+	unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
+	unsigned long result = offset & ~63UL;
+	unsigned long tmp;
+
+	if (offset >= size)
+		return size;
+	size -= result;
+	offset &= 63UL;
+	if (offset) {
+		tmp = *(p++);
+		tmp &= ~0UL << offset;
+		if (size < 64)
+			goto found_first;
+		if (tmp)
+			goto found_middle;
+		size -= 64;
+		result += 64;
+	}
+	while (size & ~63UL) {
+		if ((tmp = *(p++)))
+			goto found_middle;
+		result += 64;
+		size -= 64;
+	}
+	if (!size)
+		return result;
+	tmp = *p;
+  found_first:
+	tmp &= ~0UL >> (64-size);
+	if (tmp == 0UL)		/* Are any bits set? */
+		return result + size; /* Nope. */
+  found_middle:
+	return result + __ffs(tmp);
+}
+
+#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
+
 #ifdef __KERNEL__
 
+#define __clear_bit(nr, addr)        clear_bit(nr, addr)
+
 #define ext2_set_bit                 test_and_set_bit
 #define ext2_clear_bit               test_and_clear_bit
 #define ext2_test_bit                test_bit
@@ -400,6 +479,16 @@
 #define minix_test_bit(nr,addr)			test_bit(nr,addr)
 #define minix_find_first_zero_bit(addr,size)	find_first_zero_bit(addr,size)
 
+static inline int
+sched_find_first_bit (unsigned long *b)
+{
+	if (unlikely(b[0]))
+		return __ffs(b[0]);
+	if (unlikely(b[1]))
+		return 64 + __ffs(b[1]);
+	return __ffs(b[2]) + 128;
+}
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_IA64_BITOPS_H */
diff -Nur linux-2.4.33-imedia/include/asm-m68k/bitops.h linux-2.4.33-imedia-patching/include/asm-m68k/bitops.h
--- linux-2.4.33-imedia/include/asm-m68k/bitops.h	2004-02-18 15:36:32.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-m68k/bitops.h	2006-01-26 15:19:42.000000000 +0200
@@ -97,6 +97,7 @@
   (__builtin_constant_p(nr) ? \
    __constant_clear_bit(nr, vaddr) : \
    __generic_clear_bit(nr, vaddr))
+#define __clear_bit(nr,vaddr) clear_bit(nr,vaddr)
 
 static inline void __constant_clear_bit(int nr, volatile void *vaddr)
 {
@@ -238,6 +239,28 @@
 
 	return 32 - cnt;
 }
+#define __ffs(x) (ffs(x) - 1)
+
+
+/*
+ * Every architecture must define this function. It's the fastest
+ * way of searching a 140-bit bitmap where the first 100 bits are
+ * unlikely to be set. It's guaranteed that at least one of the 140
+ * bits is cleared.
+ */
+static inline int sched_find_first_bit(unsigned long *b)
+{
+	if (unlikely(b[0]))
+		return __ffs(b[0]);
+	if (unlikely(b[1]))
+		return __ffs(b[1]) + 32;
+	if (unlikely(b[2]))
+		return __ffs(b[2]) + 64;
+	if (b[3])
+		return __ffs(b[3]) + 96;
+	return __ffs(b[4]) + 128;
+}
+
 
 /*
  * hweightN: returns the hamming weight (i.e. the number
diff -Nur linux-2.4.33-imedia/include/asm-mips/bitops.h linux-2.4.33-imedia-patching/include/asm-mips/bitops.h
--- linux-2.4.33-imedia/include/asm-mips/bitops.h	2004-02-18 15:36:32.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-mips/bitops.h	2006-01-26 15:19:42.000000000 +0200
@@ -51,6 +51,8 @@
 
 #ifdef CONFIG_CPU_HAS_LLSC
 
+#include <asm/mipsregs.h>
+
 /*
  * These functions for MIPS ISA > 1 are interrupt and SMP proof and
  * interrupt friendly
@@ -593,21 +595,30 @@
  *
  * Undefined if no zero exists, so code should check against ~0UL first.
  */
-static __inline__ unsigned long ffz(unsigned long word)
+extern __inline__ unsigned long ffz(unsigned long word)
 {
-	int b = 0, s;
+	unsigned int    __res;
+	unsigned int    mask = 1;
 
-	word = ~word;
-	s = 16; if (word << 16 != 0) s = 0; b += s; word >>= s;
-	s =  8; if (word << 24 != 0) s = 0; b += s; word >>= s;
-	s =  4; if (word << 28 != 0) s = 0; b += s; word >>= s;
-	s =  2; if (word << 30 != 0) s = 0; b += s; word >>= s;
-	s =  1; if (word << 31 != 0) s = 0; b += s;
+	__asm__ (
+		".set\tnoreorder\n\t"
+		".set\tnoat\n\t"
+		"move\t%0,$0\n"
+		"1:\tand\t$1,%2,%1\n\t"
+		"beqz\t$1,2f\n\t"
+		"sll\t%1,1\n\t"
+		"bnez\t%1,1b\n\t"
+		"addiu\t%0,1\n\t"
+		".set\tat\n\t"
+		".set\treorder\n"
+		"2:\n\t"
+		: "=&r" (__res), "=r" (mask)
+		: "r" (word), "1" (mask)
+		: "$1");
 
-	return b;
+	return __res;
 }
 
-
 #ifdef __KERNEL__
 
 /*
diff -Nur linux-2.4.33-imedia/include/asm-mips/smplock.h linux-2.4.33-imedia-patching/include/asm-mips/smplock.h
--- linux-2.4.33-imedia/include/asm-mips/smplock.h	2004-02-18 15:36:32.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-mips/smplock.h	2006-01-26 15:19:42.000000000 +0200
@@ -8,12 +8,21 @@
 #ifndef __ASM_SMPLOCK_H
 #define __ASM_SMPLOCK_H
 
+#include <linux/config.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
 
 extern spinlock_t kernel_flag;
 
+#ifdef CONFIG_SMP
 #define kernel_locked()		spin_is_locked(&kernel_flag)
+#else
+#ifdef CONFIG_PREEMPT
+#define kernel_locked()         preempt_get_count()
+#else
+#define kernel_locked()         1
+#endif
+#endif
 
 /*
  * Release global kernel lock and global interrupt lock
@@ -45,8 +54,14 @@
  */
 static __inline__ void lock_kernel(void)
 {
+#ifdef CONFIG_PREEMPT
+	if (current->lock_depth == -1)
+		spin_lock(&kernel_flag);
+	++current->lock_depth;
+#else
 	if (!++current->lock_depth)
 		spin_lock(&kernel_flag);
+#endif
 }
 
 static __inline__ void unlock_kernel(void)
diff -Nur linux-2.4.33-imedia/include/asm-mips/softirq.h linux-2.4.33-imedia-patching/include/asm-mips/softirq.h
--- linux-2.4.33-imedia/include/asm-mips/softirq.h	2002-11-29 01:53:15.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-mips/softirq.h	2006-01-26 15:19:42.000000000 +0200
@@ -15,6 +15,7 @@
 
 static inline void cpu_bh_disable(int cpu)
 {
+	preempt_disable();
 	local_bh_count(cpu)++;
 	barrier();
 }
@@ -23,6 +24,7 @@
 {
 	barrier();
 	local_bh_count(cpu)--;
+	preempt_enable();
 }
 
 
@@ -36,6 +38,7 @@
 	cpu = smp_processor_id();				\
 	if (!--local_bh_count(cpu) && softirq_pending(cpu))	\
 		do_softirq();					\
+	preempt_enable();                                       \
 } while (0)
 
 #define in_softirq() (local_bh_count(smp_processor_id()) != 0)
diff -Nur linux-2.4.33-imedia/include/asm-mips/system.h linux-2.4.33-imedia-patching/include/asm-mips/system.h
--- linux-2.4.33-imedia/include/asm-mips/system.h	2004-02-18 15:36:32.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-mips/system.h	2006-01-26 15:19:42.000000000 +0200
@@ -333,4 +333,18 @@
 #define die_if_kernel(msg, regs)					\
 	__die_if_kernel(msg, regs, __FILE__ ":", __FUNCTION__, __LINE__)
 
+extern __inline__ int intr_on(void)
+{
+	unsigned long flags;
+	save_flags(flags);
+	return flags & 1;
+}
+
+extern __inline__ int intr_off(void)
+{
+	return ! intr_on();
+}
+
+#define irqs_disabled()	intr_off()
+
 #endif /* _ASM_SYSTEM_H */
diff -Nur linux-2.4.33-imedia/include/asm-ppc/bitops.h linux-2.4.33-imedia-patching/include/asm-ppc/bitops.h
--- linux-2.4.33-imedia/include/asm-ppc/bitops.h	2003-08-25 14:44:44.000000000 +0300
+++ linux-2.4.33-imedia-patching/include/asm-ppc/bitops.h	2006-01-26 15:19:42.000000000 +0200
@@ -7,9 +7,18 @@
 #define _PPC_BITOPS_H
 
 #include <linux/config.h>
+#include <linux/compiler.h>
 #include <asm/byteorder.h>
 #include <asm/atomic.h>
 
+#ifdef CONFIG_IBM405_ERR77
+#define PPC405_ERR77(ra,rb)    dcbt    ra, rb;
+#define        PPC405_ERR77_SYNC       sync;
+#else
+#define PPC405_ERR77(ra,rb)
+#define PPC405_ERR77_SYNC
+#endif
+
 /*
  * The test_and_*_bit operations are taken to imply a memory barrier
  * on SMP systems.
@@ -26,7 +35,7 @@
  * These used to be if'd out here because using : "cc" as a constraint
  * resulted in errors from egcs.  Things appear to be OK with gcc-2.95.
  */
-static __inline__ void set_bit(int nr, volatile void * addr)
+static __inline__ void set_bit(int nr, volatile unsigned long * addr)
 {
 	unsigned long old;
 	unsigned long mask = 1 << (nr & 0x1f);
@@ -46,7 +55,7 @@
 /*
  * non-atomic version
  */
-static __inline__ void __set_bit(int nr, volatile void *addr)
+static __inline__ void __set_bit(int nr, volatile unsigned long *addr)
 {
 	unsigned long mask = 1 << (nr & 0x1f);
 	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
@@ -60,7 +69,7 @@
 #define smp_mb__before_clear_bit()	smp_mb()
 #define smp_mb__after_clear_bit()	smp_mb()
 
-static __inline__ void clear_bit(int nr, volatile void *addr)
+static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
 {
 	unsigned long old;
 	unsigned long mask = 1 << (nr & 0x1f);
@@ -80,7 +89,7 @@
 /*
  * non-atomic version
  */
-static __inline__ void __clear_bit(int nr, volatile void *addr)
+static __inline__ void __clear_bit(int nr, volatile unsigned long *addr)
 {
 	unsigned long mask = 1 << (nr & 0x1f);
 	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
@@ -88,7 +97,7 @@
 	*p &= ~mask;
 }
 
-static __inline__ void change_bit(int nr, volatile void *addr)
+static __inline__ void change_bit(int nr, volatile unsigned long *addr)
 {
 	unsigned long old;
 	unsigned long mask = 1 << (nr & 0x1f);
@@ -108,7 +117,7 @@
 /*
  * non-atomic version
  */
-static __inline__ void __change_bit(int nr, volatile void *addr)
+static __inline__ void __change_bit(int nr, volatile unsigned long *addr)
 {
 	unsigned long mask = 1 << (nr & 0x1f);
 	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
@@ -119,7 +128,7 @@
 /*
  * test_and_*_bit do imply a memory barrier (?)
  */
-static __inline__ int test_and_set_bit(int nr, volatile void *addr)
+static __inline__ int test_and_set_bit(int nr, volatile unsigned long *addr)
 {
 	unsigned int old, t;
 	unsigned int mask = 1 << (nr & 0x1f);
@@ -142,7 +151,7 @@
 /*
  * non-atomic version
  */
-static __inline__ int __test_and_set_bit(int nr, volatile void *addr)
+static __inline__ int __test_and_set_bit(int nr, volatile unsigned long *addr)
 {
 	unsigned long mask = 1 << (nr & 0x1f);
 	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
@@ -152,7 +161,7 @@
 	return (old & mask) != 0;
 }
 
-static __inline__ int test_and_clear_bit(int nr, volatile void *addr)
+static __inline__ int test_and_clear_bit(int nr, volatile unsigned long *addr)
 {
 	unsigned int old, t;
 	unsigned int mask = 1 << (nr & 0x1f);
@@ -175,7 +184,7 @@
 /*
  * non-atomic version
  */
-static __inline__ int __test_and_clear_bit(int nr, volatile void *addr)
+static __inline__ int __test_and_clear_bit(int nr, volatile unsigned long *addr)
 {
 	unsigned long mask = 1 << (nr & 0x1f);
 	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
@@ -185,7 +194,7 @@
 	return (old & mask) != 0;
 }
 
-static __inline__ int test_and_change_bit(int nr, volatile void *addr)
+static __inline__ int test_and_change_bit(int nr, volatile unsigned long *addr)
 {
 	unsigned int old, t;
 	unsigned int mask = 1 << (nr & 0x1f);
@@ -208,7 +217,7 @@
 /*
  * non-atomic version
  */
-static __inline__ int __test_and_change_bit(int nr, volatile void *addr)
+static __inline__ int __test_and_change_bit(int nr, volatile unsigned long *addr)
 {
 	unsigned long mask = 1 << (nr & 0x1f);
 	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
@@ -218,7 +227,7 @@
 	return (old & mask) != 0;
 }
 
-static __inline__ int test_bit(int nr, __const__ volatile void *addr)
+static __inline__ int test_bit(int nr, __const__ volatile unsigned long *addr)
 {
 	__const__ unsigned int *p = (__const__ unsigned int *) addr;
 
@@ -226,7 +235,7 @@
 }
 
 /* Return the bit position of the most significant 1 bit in a word */
-static __inline__ int __ilog2(unsigned int x)
+static __inline__ int __ilog2(unsigned long x)
 {
 	int lz;
 
@@ -234,13 +243,18 @@
 	return 31 - lz;
 }
 
-static __inline__ int ffz(unsigned int x)
+static __inline__ int ffz(unsigned long x)
 {
 	if ((x = ~x) == 0)
 		return 32;
 	return __ilog2(x & -x);
 }
 
+static inline int __ffs(unsigned long x)
+{
+	return __ilog2(x & -x);
+}
+
 /*
  * ffs: find first bit set. This is defined the same way as
  * the libc and compiler builtin ffs routines, therefore
@@ -252,6 +266,18 @@
 }
 
 /*
+ * fls: find last (most-significant) bit set.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+static __inline__ int fls(unsigned int x)
+{
+	int lz;
+
+	asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
+	return 32 - lz;
+}
+
+/*
  * hweightN: returns the hamming weight (i.e. the number
  * of bits set) of a N-bit word
  */
@@ -261,13 +287,95 @@
 #define hweight8(x) generic_hweight8(x)
 
 /*
+ * Find the first bit set in a 140-bit bitmap.
+ * The first 100 bits are unlikely to be set.
+ */
+static inline int sched_find_first_bit(unsigned long *b)
+{
+	if (unlikely(b[0]))
+		return __ffs(b[0]);
+	if (unlikely(b[1]))
+		return __ffs(b[1]) + 32;
+	if (unlikely(b[2]))
+		return __ffs(b[2]) + 64;
+	if (b[3])
+		return __ffs(b[3]) + 96;
+	return __ffs(b[4]) + 128;
+}
+
+/**
+ * find_next_bit - find the next set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+static __inline__ unsigned long find_next_bit(unsigned long *addr,
+       unsigned long size, unsigned long offset)
+{
+	unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
+	unsigned int result = offset & ~31UL;
+	unsigned int tmp;
+
+	if (offset >= size)
+		return size;
+	size -= result;
+	offset &= 31UL;
+	if (offset) {
+		tmp = *p++;
+		tmp &= ~0UL << offset;
+		if (size < 32)
+			goto found_first;
+		if (tmp)
+			goto found_middle;
+		size -= 32;
+		result += 32;
+	}
+	while (size >= 32) {
+		if ((tmp = *p++) != 0)
+			goto found_middle;
+		result += 32;
+		size -= 32;
+	}
+	if (!size)
+		return result;
+	tmp = *p;
+
+found_first:
+	tmp &= ~0UL >> (32 - size);
+	if (tmp == 0UL)        /* Are any bits set? */
+		return result + size; /* Nope. */
+found_middle:
+	return result + __ffs(tmp);
+}
+
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first set bit, not the number of the byte
+ * containing a bit.
+ */
+#define find_first_bit(addr, size) \
+	find_next_bit((addr), (size), 0)
+
+/*
+ * Every architecture must define this function. It's the fastest
+ * way of searching a 140-bit bitmap where the first 100 bits are
+ * unlikely to be set. It's guaranteed that at least one of the 140
+ * bits is cleared.
+ */
+#define _sched_find_first_bit(map) \
+       find_first_bit(map, MAX_PRIO)
+
+/*
  * This implementation of find_{first,next}_zero_bit was stolen from
  * Linus' asm-alpha/bitops.h.
  */
 #define find_first_zero_bit(addr, size) \
 	find_next_zero_bit((addr), (size), 0)
 
-static __inline__ unsigned long find_next_zero_bit(void * addr,
+static __inline__ unsigned long find_next_zero_bit(unsigned long * addr,
 	unsigned long size, unsigned long offset)
 {
 	unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
@@ -306,8 +414,8 @@
 }
 
 
-#define ext2_set_bit(nr, addr)		__test_and_set_bit((nr) ^ 0x18, addr)
-#define ext2_clear_bit(nr, addr)	__test_and_clear_bit((nr) ^ 0x18, addr)
+#define ext2_set_bit(nr, addr)		__test_and_set_bit((nr) ^ 0x18, (unsigned long *)(addr))
+#define ext2_clear_bit(nr, addr)	__test_and_clear_bit((nr) ^ 0x18, (unsigned long *)(addr))
 
 static __inline__ int ext2_test_bit(int nr, __const__ void * addr)
 {
diff -Nur linux-2.4.33-imedia/include/asm-ppc/dma.h linux-2.4.33-imedia-patching/include/asm-ppc/dma.h
--- linux-2.4.33-imedia/include/asm-ppc/dma.h	2003-06-13 17:51:38.000000000 +0300
+++ linux-2.4.33-imedia-patching/include/asm-ppc/dma.h	2006-01-26 15:19:42.000000000 +0200
@@ -11,6 +11,7 @@
 #include <linux/config.h>
 #include <asm/io.h>
 #include <linux/spinlock.h>
+#include <linux/sched.h>
 #include <asm/system.h>
 
 /*
diff -Nur linux-2.4.33-imedia/include/asm-ppc/hardirq.h linux-2.4.33-imedia-patching/include/asm-ppc/hardirq.h
--- linux-2.4.33-imedia/include/asm-ppc/hardirq.h	2003-08-25 14:44:44.000000000 +0300
+++ linux-2.4.33-imedia-patching/include/asm-ppc/hardirq.h	2006-01-26 15:19:42.000000000 +0200
@@ -31,10 +31,12 @@
  * Are we in an interrupt context? Either doing bottom half
  * or hardware interrupt processing?
  */
-#define in_interrupt() ({ int __cpu = smp_processor_id(); \
-	(local_irq_count(__cpu) + local_bh_count(__cpu) != 0); })
+#define in_interrupt() (preempt_is_disabled() && \
+	({ unsigned long __cpu = smp_processor_id(); \
+	(local_irq_count(__cpu) + local_bh_count(__cpu) != 0); }))
 
-#define in_irq() (local_irq_count(smp_processor_id()) != 0)
+#define in_irq() (preempt_is_disabled() && \
+	(local_irq_count(smp_processor_id()) != 0))
 
 #ifndef CONFIG_SMP
 
@@ -45,6 +47,7 @@
 #define hardirq_exit(cpu)	(local_irq_count(cpu)--)
 
 #define synchronize_irq()	do { } while (0)
+#define release_irqlock(cpu)	do { } while (0)
 
 #else /* CONFIG_SMP */
 
diff -Nur linux-2.4.33-imedia/include/asm-ppc/highmem.h linux-2.4.33-imedia-patching/include/asm-ppc/highmem.h
--- linux-2.4.33-imedia/include/asm-ppc/highmem.h	2003-11-28 20:26:21.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-ppc/highmem.h	2006-01-26 15:19:42.000000000 +0200
@@ -84,6 +84,7 @@
 	unsigned int idx;
 	unsigned long vaddr;
 
+	preempt_disable();
 	if (page < highmem_start_page)
 		return page_address(page);
 
@@ -105,8 +106,10 @@
 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
 	unsigned int idx = type + KM_TYPE_NR*smp_processor_id();
 
-	if (vaddr < KMAP_FIX_BEGIN) // FIXME
+	if (vaddr < KMAP_FIX_BEGIN) { // FIXME
+		preempt_enable();
 		return;
+	}
 
 	if (vaddr != KMAP_FIX_BEGIN + idx * PAGE_SIZE)
 		BUG();
@@ -118,6 +121,7 @@
 	pte_clear(kmap_pte+idx);
 	flush_tlb_page(0, vaddr);
 #endif
+	preempt_enable();
 }
 
 #endif /* __KERNEL__ */
diff -Nur linux-2.4.33-imedia/include/asm-ppc/hw_irq.h linux-2.4.33-imedia-patching/include/asm-ppc/hw_irq.h
--- linux-2.4.33-imedia/include/asm-ppc/hw_irq.h	2003-06-13 17:51:38.000000000 +0300
+++ linux-2.4.33-imedia-patching/include/asm-ppc/hw_irq.h	2006-01-26 15:19:42.000000000 +0200
@@ -20,6 +20,12 @@
 #define __save_and_cli(flags) ({__save_flags(flags);__cli();})
 #define __save_and_sti(flags) ({__save_flags(flags);__sti();})
 
+#define mfmsr()		({unsigned int rval; \
+			asm volatile("mfmsr %0" : "=r" (rval)); rval;})
+#define mtmsr(v)	asm volatile("mtmsr %0" : : "r" (v))
+
+#define irqs_disabled()	((mfmsr() & MSR_EE) == 0)
+
 extern void do_lost_interrupts(unsigned long);
 
 #define mask_irq(irq) ({if (irq_desc[irq].handler && irq_desc[irq].handler->disable) irq_desc[irq].handler->disable(irq);})
diff -Nur linux-2.4.33-imedia/include/asm-ppc/mmu_context.h linux-2.4.33-imedia-patching/include/asm-ppc/mmu_context.h
--- linux-2.4.33-imedia/include/asm-ppc/mmu_context.h	2003-06-13 17:51:38.000000000 +0300
+++ linux-2.4.33-imedia-patching/include/asm-ppc/mmu_context.h	2006-01-26 15:19:43.000000000 +0200
@@ -155,6 +155,10 @@
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 			     struct task_struct *tsk, int cpu)
 {
+#ifdef CONFIG_PREEMPT
+	if (preempt_get_count() == 0)
+		BUG();
+#endif
 	tsk->thread.pgdir = next->pgd;
 	get_mmu_context(next);
 	set_context(next->context, next->pgd);
diff -Nur linux-2.4.33-imedia/include/asm-ppc/pgalloc.h linux-2.4.33-imedia-patching/include/asm-ppc/pgalloc.h
--- linux-2.4.33-imedia/include/asm-ppc/pgalloc.h	2003-11-28 20:26:21.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-ppc/pgalloc.h	2006-01-26 15:19:43.000000000 +0200
@@ -72,20 +72,26 @@
 {
         unsigned long *ret;
 
+	preempt_disable();
         if ((ret = pgd_quicklist) != NULL) {
                 pgd_quicklist = (unsigned long *)(*ret);
                 ret[0] = 0;
                 pgtable_cache_size--;
-        } else
+		preempt_enable();
+        } else {
+		preempt_enable();
                 ret = (unsigned long *)get_pgd_slow();
+        }
         return (pgd_t *)ret;
 }
 
 extern __inline__ void free_pgd_fast(pgd_t *pgd)
 {
+	preempt_disable();
         *(unsigned long **)pgd = pgd_quicklist;
         pgd_quicklist = (unsigned long *) pgd;
         pgtable_cache_size++;
+	preempt_enable();
 }
 
 extern __inline__ void free_pgd_slow(pgd_t *pgd)
@@ -124,19 +130,23 @@
 {
         unsigned long *ret;
 
+	preempt_disable();
         if ((ret = pte_quicklist) != NULL) {
                 pte_quicklist = (unsigned long *)(*ret);
                 ret[0] = 0;
                 pgtable_cache_size--;
 	}
+	preempt_enable();
         return (pte_t *)ret;
 }
 
 extern __inline__ void pte_free_fast(pte_t *pte)
 {
+	preempt_disable();
         *(unsigned long **)pte = pte_quicklist;
         pte_quicklist = (unsigned long *) pte;
         pgtable_cache_size++;
+	preempt_enable();
 }
 
 extern __inline__ void pte_free_slow(pte_t *pte)
diff -Nur linux-2.4.33-imedia/include/asm-ppc/smplock.h linux-2.4.33-imedia-patching/include/asm-ppc/smplock.h
--- linux-2.4.33-imedia/include/asm-ppc/smplock.h	2003-06-13 17:51:38.000000000 +0300
+++ linux-2.4.33-imedia-patching/include/asm-ppc/smplock.h	2006-01-26 15:19:43.000000000 +0200
@@ -12,7 +12,15 @@
 
 extern spinlock_t kernel_flag;
 
+#ifdef CONFIG_SMP
 #define kernel_locked()		spin_is_locked(&kernel_flag)
+#else
+#ifdef CONFIG_PREEMPT
+#define kernel_locked()		preempt_get_count()
+#else
+#define kernel_locked()		1
+#endif
+#endif
 
 /*
  * Release global kernel lock and global interrupt lock
@@ -44,8 +52,14 @@
  */
 static __inline__ void lock_kernel(void)
 {
+#ifdef CONFIG_PREEMPT
+	if (current->lock_depth == -1)
+		spin_lock(&kernel_flag);
+	++current->lock_depth;
+#else
 	if (!++current->lock_depth)
 		spin_lock(&kernel_flag);
+#endif
 }
 
 static __inline__ void unlock_kernel(void)
diff -Nur linux-2.4.33-imedia/include/asm-ppc/softirq.h linux-2.4.33-imedia-patching/include/asm-ppc/softirq.h
--- linux-2.4.33-imedia/include/asm-ppc/softirq.h	2003-06-13 17:51:38.000000000 +0300
+++ linux-2.4.33-imedia-patching/include/asm-ppc/softirq.h	2006-01-26 15:19:43.000000000 +0200
@@ -7,6 +7,7 @@
 
 #define local_bh_disable()			\
 do {						\
+	preempt_disable();			\
 	local_bh_count(smp_processor_id())++;	\
 	barrier();				\
 } while (0)
@@ -15,9 +16,10 @@
 do {						\
 	barrier();				\
 	local_bh_count(smp_processor_id())--;	\
+	preempt_enable();			\
 } while (0)
 
-#define local_bh_enable()				\
+#define _local_bh_enable()				\
 do {							\
 	if (!--local_bh_count(smp_processor_id())	\
 	    && softirq_pending(smp_processor_id())) {	\
@@ -25,7 +27,14 @@
 	}						\
 } while (0)
 
-#define in_softirq() (local_bh_count(smp_processor_id()) != 0)
+#define local_bh_enable()			\
+do {						\
+	_local_bh_enable();			\
+	preempt_enable();			\
+} while (0)
+
+#define in_softirq() (preempt_is_disabled() && \
+			(local_bh_count(smp_processor_id()) != 0))
 
 #endif	/* __ASM_SOFTIRQ_H */
 #endif /* __KERNEL__ */
diff -Nur linux-2.4.33-imedia/include/asm-ppc64/bitops.h linux-2.4.33-imedia-patching/include/asm-ppc64/bitops.h
--- linux-2.4.33-imedia/include/asm-ppc64/bitops.h	2003-06-13 17:51:38.000000000 +0300
+++ linux-2.4.33-imedia-patching/include/asm-ppc64/bitops.h	2006-01-26 15:19:43.000000000 +0200
@@ -41,12 +41,12 @@
 #define smp_mb__before_clear_bit()	smp_mb()
 #define smp_mb__after_clear_bit()	smp_mb()
 
-static __inline__ int test_bit(unsigned long nr, __const__ volatile void *addr)
+static __inline__ int test_bit(unsigned long nr, __const__ volatile unsigned long *addr)
 {
 	return (1UL & (((__const__ long *) addr)[nr >> 6] >> (nr & 63)));
 }
 
-static __inline__ void set_bit(unsigned long nr, volatile void *addr)
+static __inline__ void set_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	unsigned long old;
 	unsigned long mask = 1UL << (nr & 0x3f);
@@ -62,7 +62,7 @@
 	: "cc");
 }
 
-static __inline__ void clear_bit(unsigned long nr, volatile void *addr)
+static __inline__ void clear_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	unsigned long old;
 	unsigned long mask = 1UL << (nr & 0x3f);
@@ -78,7 +78,7 @@
 	: "cc");
 }
 
-static __inline__ void change_bit(unsigned long nr, volatile void *addr)
+static __inline__ void change_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	unsigned long old;
 	unsigned long mask = 1UL << (nr & 0x3f);
@@ -94,7 +94,7 @@
 	: "cc");
 }
 
-static __inline__ int test_and_set_bit(unsigned long nr, volatile void *addr)
+static __inline__ int test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	unsigned long old, t;
 	unsigned long mask = 1UL << (nr & 0x3f);
@@ -114,7 +114,7 @@
 	return (old & mask) != 0;
 }
 
-static __inline__ int test_and_clear_bit(unsigned long nr, volatile void *addr)
+static __inline__ int test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	unsigned long old, t;
 	unsigned long mask = 1UL << (nr & 0x3f);
@@ -134,7 +134,7 @@
 	return (old & mask) != 0;
 }
 
-static __inline__ int test_and_change_bit(unsigned long nr, volatile void *addr)
+static __inline__ int test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	unsigned long old, t;
 	unsigned long mask = 1UL << (nr & 0x3f);
@@ -157,7 +157,7 @@
 /*
  * non-atomic versions
  */
-static __inline__ void __set_bit(unsigned long nr, volatile void *addr)
+static __inline__ void __set_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	unsigned long mask = 1UL << (nr & 0x3f);
 	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
@@ -165,7 +165,7 @@
 	*p |= mask;
 }
 
-static __inline__ void __clear_bit(unsigned long nr, volatile void *addr)
+static __inline__ void __clear_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	unsigned long mask = 1UL << (nr & 0x3f);
 	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
@@ -173,7 +173,7 @@
 	*p &= ~mask;
 }
 
-static __inline__ void __change_bit(unsigned long nr, volatile void *addr)
+static __inline__ void __change_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	unsigned long mask = 1UL << (nr & 0x3f);
 	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
@@ -181,7 +181,7 @@
 	*p ^= mask;
 }
 
-static __inline__ int __test_and_set_bit(unsigned long nr, volatile void *addr)
+static __inline__ int __test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	unsigned long mask = 1UL << (nr & 0x3f);
 	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
@@ -191,7 +191,7 @@
 	return (old & mask) != 0;
 }
 
-static __inline__ int __test_and_clear_bit(unsigned long nr, volatile void *addr)
+static __inline__ int __test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	unsigned long mask = 1UL << (nr & 0x3f);
 	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
@@ -201,7 +201,7 @@
 	return (old & mask) != 0;
 }
 
-static __inline__ int __test_and_change_bit(unsigned long nr, volatile void *addr)
+static __inline__ int __test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	unsigned long mask = 1UL << (nr & 0x3f);
 	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
diff -Nur linux-2.4.33-imedia/include/asm-s390/bitops.h linux-2.4.33-imedia-patching/include/asm-s390/bitops.h
--- linux-2.4.33-imedia/include/asm-s390/bitops.h	2002-08-03 03:39:45.000000000 +0300
+++ linux-2.4.33-imedia-patching/include/asm-s390/bitops.h	2006-01-26 15:19:43.000000000 +0200
@@ -47,272 +47,217 @@
 extern const char _oi_bitmap[];
 extern const char _ni_bitmap[];
 extern const char _zb_findmap[];
+extern const char _sb_findmap[];
 
 #ifdef CONFIG_SMP
 /*
  * SMP save set_bit routine based on compare and swap (CS)
  */
-static __inline__ void set_bit_cs(int nr, volatile void * addr)
+static inline void set_bit_cs(int nr, volatile void *ptr)
 {
-	unsigned long bits, mask;
-        __asm__ __volatile__(
+        unsigned long addr, old, new, mask;
+
+	addr = (unsigned long) ptr;
 #if ALIGN_CS == 1
-             "   lhi   %2,3\n"         /* CS must be aligned on 4 byte b. */
-             "   nr    %2,%1\n"        /* isolate last 2 bits of address */
-             "   xr    %1,%2\n"        /* make addr % 4 == 0 */
-             "   sll   %2,3\n"
-             "   ar    %0,%2\n"        /* add alignement to bitnr */
+	addr ^= addr & 3;		/* align address to 4 */
+	nr += (addr & 3) << 3;		/* add alignment to bit number */
 #endif
-             "   lhi   %2,31\n"
-             "   nr    %2,%0\n"        /* make shift value */
-             "   xr    %0,%2\n"
-             "   srl   %0,3\n"
-             "   lhi   %3,1\n"
-             "   la    %1,0(%0,%1)\n"  /* calc. address for CS */
-             "   sll   %3,0(%2)\n"       /* make OR mask */
-             "   l     %0,0(%1)\n"
-             "0: lr    %2,%0\n"         /* CS loop starts here */
-             "   or    %2,%3\n"          /* set bit */
-             "   cs    %0,%2,0(%1)\n"
-             "   jl    0b"
-             : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
-             : "cc", "memory" );
+	addr += (nr ^ (nr & 31)) >> 3;	/* calculate address for CS */
+	mask = 1UL << (nr & 31);	/* make OR mask */
+	asm volatile(
+		"   l   %0,0(%4)\n"
+		"0: lr  %1,%0\n"
+		"   or  %1,%3\n"
+		"   cs  %0,%1,0(%4)\n"
+		"   jl  0b"
+		: "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
+		: "d" (mask), "a" (addr)
+		: "cc" );
 }
 
 /*
  * SMP save clear_bit routine based on compare and swap (CS)
  */
-static __inline__ void clear_bit_cs(int nr, volatile void * addr)
+static inline void clear_bit_cs(int nr, volatile void *ptr)
 {
-        static const int minusone = -1;
-	unsigned long bits, mask;
-        __asm__ __volatile__(
+        unsigned long addr, old, new, mask;
+
+	addr = (unsigned long) ptr;
 #if ALIGN_CS == 1
-             "   lhi   %2,3\n"         /* CS must be aligned on 4 byte b. */
-             "   nr    %2,%1\n"        /* isolate last 2 bits of address */
-             "   xr    %1,%2\n"        /* make addr % 4 == 0 */
-             "   sll   %2,3\n"
-             "   ar    %0,%2\n"        /* add alignement to bitnr */
+	addr ^= addr & 3;		/* align address to 4 */
+	nr += (addr & 3) << 3;		/* add alignment to bit number */
 #endif
-             "   lhi   %2,31\n"
-             "   nr    %2,%0\n"        /* make shift value */
-             "   xr    %0,%2\n"
-             "   srl   %0,3\n"
-             "   lhi   %3,1\n"
-             "   la    %1,0(%0,%1)\n"  /* calc. address for CS */
-             "   sll   %3,0(%2)\n"
-             "   x     %3,%4\n"        /* make AND mask */
-             "   l     %0,0(%1)\n"
-             "0: lr    %2,%0\n"        /* CS loop starts here */
-             "   nr    %2,%3\n"        /* clear bit */
-             "   cs    %0,%2,0(%1)\n"
-             "   jl    0b"
-             : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask)
-             : "m" (minusone) : "cc", "memory" );
+	addr += (nr ^ (nr & 31)) >> 3;	/* calculate address for CS */
+	mask = ~(1UL << (nr & 31));	/* make AND mask */
+	asm volatile(
+		"   l   %0,0(%4)\n"
+		"0: lr  %1,%0\n"
+		"   nr  %1,%3\n"
+		"   cs  %0,%1,0(%4)\n"
+		"   jl  0b"
+		: "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
+		: "d" (mask), "a" (addr)
+		: "cc" );
 }
 
 /*
  * SMP save change_bit routine based on compare and swap (CS)
  */
-static __inline__ void change_bit_cs(int nr, volatile void * addr)
+static inline void change_bit_cs(int nr, volatile void *ptr)
 {
-	unsigned long bits, mask;
-        __asm__ __volatile__(
+        unsigned long addr, old, new, mask;
+
+	addr = (unsigned long) ptr;
 #if ALIGN_CS == 1
-             "   lhi   %2,3\n"         /* CS must be aligned on 4 byte b. */
-             "   nr    %2,%1\n"        /* isolate last 2 bits of address */
-             "   xr    %1,%2\n"        /* make addr % 4 == 0 */
-             "   sll   %2,3\n"
-             "   ar    %0,%2\n"        /* add alignement to bitnr */
+	addr ^= addr & 3;		/* align address to 4 */
+	nr += (addr & 3) << 3;		/* add alignment to bit number */
 #endif
-             "   lhi   %2,31\n"
-             "   nr    %2,%0\n"        /* make shift value */
-             "   xr    %0,%2\n"
-             "   srl   %0,3\n"
-             "   lhi   %3,1\n"
-             "   la    %1,0(%0,%1)\n"  /* calc. address for CS */
-             "   sll   %3,0(%2)\n"     /* make XR mask */
-             "   l     %0,0(%1)\n"
-             "0: lr    %2,%0\n"        /* CS loop starts here */
-             "   xr    %2,%3\n"        /* change bit */
-             "   cs    %0,%2,0(%1)\n"
-             "   jl    0b"
-             : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) : 
-             : "cc", "memory" );
+	addr += (nr ^ (nr & 31)) >> 3;	/* calculate address for CS */
+	mask = 1UL << (nr & 31);	/* make XOR mask */
+	asm volatile(
+		"   l   %0,0(%4)\n"
+		"0: lr  %1,%0\n"
+		"   xr  %1,%3\n"
+		"   cs  %0,%1,0(%4)\n"
+		"   jl  0b"
+		: "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
+		: "d" (mask), "a" (addr)
+		: "cc" );
 }
 
 /*
  * SMP save test_and_set_bit routine based on compare and swap (CS)
  */
-static __inline__ int test_and_set_bit_cs(int nr, volatile void * addr)
+static inline int test_and_set_bit_cs(int nr, volatile void *ptr)
 {
-	unsigned long bits, mask;
-        __asm__ __volatile__(
+        unsigned long addr, old, new, mask;
+
+	addr = (unsigned long) ptr;
 #if ALIGN_CS == 1
-             "   lhi   %2,3\n"         /* CS must be aligned on 4 byte b. */
-             "   nr    %2,%1\n"        /* isolate last 2 bits of address */
-             "   xr    %1,%2\n"        /* make addr % 4 == 0 */
-             "   sll   %2,3\n"
-             "   ar    %0,%2\n"        /* add alignement to bitnr */
+	addr ^= addr & 3;		/* align address to 4 */
+	nr += (addr & 3) << 3;		/* add alignment to bit number */
 #endif
-             "   lhi   %2,31\n"
-             "   nr    %2,%0\n"        /* make shift value */
-             "   xr    %0,%2\n"
-             "   srl   %0,3\n"
-             "   lhi   %3,1\n"
-             "   la    %1,0(%0,%1)\n"  /* calc. address for CS */
-             "   sll   %3,0(%2)\n"     /* make OR mask */
-             "   l     %0,0(%1)\n"
-             "0: lr    %2,%0\n"        /* CS loop starts here */
-             "   or    %2,%3\n"        /* set bit */
-             "   cs    %0,%2,0(%1)\n"
-             "   jl    0b\n"
-             "   nr    %0,%3\n"        /* isolate old bit */
-             : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
-             : "cc", "memory" );
-        return nr != 0;
+	addr += (nr ^ (nr & 31)) >> 3;	/* calculate address for CS */
+	mask = 1UL << (nr & 31);	/* make OR/test mask */
+	asm volatile(
+		"   l   %0,0(%4)\n"
+		"0: lr  %1,%0\n"
+		"   or  %1,%3\n"
+		"   cs  %0,%1,0(%4)\n"
+		"   jl  0b"
+		: "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
+		: "d" (mask), "a" (addr)
+		: "cc" );
+	return (old & mask) != 0;
 }
 
 /*
  * SMP save test_and_clear_bit routine based on compare and swap (CS)
  */
-static __inline__ int test_and_clear_bit_cs(int nr, volatile void * addr)
+static inline int test_and_clear_bit_cs(int nr, volatile void *ptr)
 {
-        static const int minusone = -1;
-	unsigned long bits, mask;
-        __asm__ __volatile__(
+        unsigned long addr, old, new, mask;
+
+	addr = (unsigned long) ptr;
 #if ALIGN_CS == 1
-             "   lhi   %2,3\n"         /* CS must be aligned on 4 byte b. */
-             "   nr    %2,%1\n"        /* isolate last 2 bits of address */
-             "   xr    %1,%2\n"        /* make addr % 4 == 0 */
-             "   sll   %2,3\n"
-             "   ar    %0,%2\n"        /* add alignement to bitnr */
+	addr ^= addr & 3;		/* align address to 4 */
+	nr += (addr & 3) << 3;		/* add alignment to bit number */
 #endif
-             "   lhi   %2,31\n"
-             "   nr    %2,%0\n"        /* make shift value */
-             "   xr    %0,%2\n"
-             "   srl   %0,3\n"
-             "   lhi   %3,1\n"
-             "   la    %1,0(%0,%1)\n"  /* calc. address for CS */
-             "   sll   %3,0(%2)\n"
-             "   l     %0,0(%1)\n"
-             "   x     %3,%4\n"        /* make AND mask */
-             "0: lr    %2,%0\n"        /* CS loop starts here */
-             "   nr    %2,%3\n"        /* clear bit */
-             "   cs    %0,%2,0(%1)\n"
-             "   jl    0b\n"
-             "   x     %3,%4\n"
-             "   nr    %0,%3\n"         /* isolate old bit */
-             : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask)
-             : "m" (minusone) : "cc", "memory" );
-        return nr;
+	addr += (nr ^ (nr & 31)) >> 3;	/* calculate address for CS */
+	mask = ~(1UL << (nr & 31));	/* make AND mask */
+	asm volatile(
+		"   l   %0,0(%4)\n"
+		"0: lr  %1,%0\n"
+		"   nr  %1,%3\n"
+		"   cs  %0,%1,0(%4)\n"
+		"   jl  0b"
+		: "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
+		: "d" (mask), "a" (addr)
+		: "cc" );
+	return (old ^ new) != 0;
 }
 
 /*
  * SMP save test_and_change_bit routine based on compare and swap (CS) 
  */
-static __inline__ int test_and_change_bit_cs(int nr, volatile void * addr)
+static inline int test_and_change_bit_cs(int nr, volatile void *ptr)
 {
-	unsigned long bits, mask;
-        __asm__ __volatile__(
+        unsigned long addr, old, new, mask;
+
+	addr = (unsigned long) ptr;
 #if ALIGN_CS == 1
-             "   lhi   %2,3\n"         /* CS must be aligned on 4 byte b. */
-             "   nr    %2,%1\n"        /* isolate last 2 bits of address */
-             "   xr    %1,%2\n"        /* make addr % 4 == 0 */
-             "   sll   %2,3\n"
-             "   ar    %0,%2\n"        /* add alignement to bitnr */
+	addr ^= addr & 3;		/* align address to 4 */
+	nr += (addr & 3) << 3;		/* add alignment to bit number */
 #endif
-             "   lhi   %2,31\n"
-             "   nr    %2,%0\n"        /* make shift value */
-             "   xr    %0,%2\n"
-             "   srl   %0,3\n"
-             "   lhi   %3,1\n"
-             "   la    %1,0(%0,%1)\n"  /* calc. address for CS */
-             "   sll   %3,0(%2)\n"     /* make OR mask */
-             "   l     %0,0(%1)\n"
-             "0: lr    %2,%0\n"        /* CS loop starts here */
-             "   xr    %2,%3\n"        /* change bit */
-             "   cs    %0,%2,0(%1)\n"
-             "   jl    0b\n"
-             "   nr    %0,%3\n"        /* isolate old bit */
-             : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
-             : "cc", "memory" );
-        return nr != 0;
+	addr += (nr ^ (nr & 31)) >> 3;	/* calculate address for CS */
+	mask = 1UL << (nr & 31);	/* make XOR mask */
+	asm volatile(
+		"   l   %0,0(%4)\n"
+		"0: lr  %1,%0\n"
+		"   xr  %1,%3\n"
+		"   cs  %0,%1,0(%4)\n"
+		"   jl  0b"
+		: "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
+		: "d" (mask), "a" (addr)
+		: "cc" );
+	return (old & mask) != 0;
 }
 #endif /* CONFIG_SMP */
 
 /*
  * fast, non-SMP set_bit routine
  */
-static __inline__ void __set_bit(int nr, volatile void * addr)
+static inline void __set_bit(int nr, volatile void *ptr)
 {
-	unsigned long reg1, reg2;
-        __asm__ __volatile__(
-             "   lhi   %1,24\n"
-             "   lhi   %0,7\n"
-             "   xr    %1,%2\n"
-             "   nr    %0,%2\n"
-             "   srl   %1,3\n"
-             "   la    %1,0(%1,%3)\n"
-             "   la    %0,0(%0,%4)\n"
-             "   oc    0(1,%1),0(%0)"
-             : "=&a" (reg1), "=&a" (reg2)
-             : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
-}
-
-static __inline__ void 
-__constant_set_bit(const int nr, volatile void * addr)
-{
-  switch (nr&7) {
-  case 0:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "oi 0(1),0x01"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3))) 
-                          : : "1", "cc", "memory");
-    break;
-  case 1:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "oi 0(1),0x02"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 2:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "oi 0(1),0x04"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 3:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "oi 0(1),0x08"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 4:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "oi 0(1),0x10"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 5:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "oi 0(1),0x20"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 6:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "oi 0(1),0x40"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 7:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "oi 0(1),0x80"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  }
+	unsigned long addr;
+
+	addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
+        asm volatile("oc 0(1,%1),0(%2)"
+		     : "+m" (*(char *) addr)
+		     : "a" (addr), "a" (_oi_bitmap + (nr & 7))
+		     : "cc" );
+}
+
+static inline void 
+__constant_set_bit(const int nr, volatile void *ptr)
+{
+	unsigned long addr;
+
+	addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
+	switch (nr&7) {
+	case 0:
+		asm volatile ("oi 0(%1),0x01"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 1:
+		asm volatile ("oi 0(%1),0x02"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 2:
+		asm volatile ("oi 0(%1),0x04"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 3:
+		asm volatile ("oi 0(%1),0x08"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 4:
+		asm volatile ("oi 0(%1),0x10"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 5:
+		asm volatile ("oi 0(%1),0x20"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 6:
+		asm volatile ("oi 0(%1),0x40"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 7:
+		asm volatile ("oi 0(%1),0x80"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	}
 }
 
 #define set_bit_simple(nr,addr) \
@@ -323,76 +268,58 @@
 /*
  * fast, non-SMP clear_bit routine
  */
-static __inline__ void 
-__clear_bit(int nr, volatile void * addr)
+static inline void 
+__clear_bit(int nr, volatile void *ptr)
 {
-	unsigned long reg1, reg2;
-        __asm__ __volatile__(
-             "   lhi   %1,24\n"
-             "   lhi   %0,7\n"
-             "   xr    %1,%2\n"
-             "   nr    %0,%2\n"
-             "   srl   %1,3\n"
-             "   la    %1,0(%1,%3)\n"
-             "   la    %0,0(%0,%4)\n"
-             "   nc    0(1,%1),0(%0)"
-             : "=&a" (reg1), "=&a" (reg2)
-             : "r" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
-}
-
-static __inline__ void 
-__constant_clear_bit(const int nr, volatile void * addr)
-{
-  switch (nr&7) {
-  case 0:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "ni 0(1),0xFE"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 1:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "ni 0(1),0xFD"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 2:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "ni 0(1),0xFB"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 3:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "ni 0(1),0xF7"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 4:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "ni 0(1),0xEF"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "cc", "memory" );
-    break;
-  case 5:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "ni 0(1),0xDF"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 6:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "ni 0(1),0xBF"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 7:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "ni 0(1),0x7F"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  }
+	unsigned long addr;
+
+	addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
+        asm volatile("nc 0(1,%1),0(%2)"
+		     : "+m" (*(char *) addr)
+		     : "a" (addr), "a" (_ni_bitmap + (nr & 7))
+		     : "cc" );
+}
+
+static inline void 
+__constant_clear_bit(const int nr, volatile void *ptr)
+{
+	unsigned long addr;
+
+	addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
+	switch (nr&7) {
+	case 0:
+		asm volatile ("ni 0(%1),0xFE"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 1:
+		asm volatile ("ni 0(%1),0xFD"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 2:
+		asm volatile ("ni 0(%1),0xFB"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 3:
+		asm volatile ("ni 0(%1),0xF7"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 4:
+		asm volatile ("ni 0(%1),0xEF"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 5:
+		asm volatile ("ni 0(%1),0xDF"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 6:
+		asm volatile ("ni 0(%1),0xBF"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 7:
+		asm volatile ("ni 0(%1),0x7F"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	}
 }
 
 #define clear_bit_simple(nr,addr) \
@@ -403,75 +330,57 @@
 /* 
  * fast, non-SMP change_bit routine 
  */
-static __inline__ void __change_bit(int nr, volatile void * addr)
+static inline void __change_bit(int nr, volatile void *ptr)
 {
-	unsigned long reg1, reg2;
-        __asm__ __volatile__(
-             "   lhi   %1,24\n"
-             "   lhi   %0,7\n"
-             "   xr    %1,%2\n"
-             "   nr    %0,%2\n"
-             "   srl   %1,3\n"
-             "   la    %1,0(%1,%3)\n"
-             "   la    %0,0(%0,%4)\n"
-             "   xc    0(1,%1),0(%0)"
-             : "=&a" (reg1), "=&a" (reg2)
-             : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
-}
-
-static __inline__ void 
-__constant_change_bit(const int nr, volatile void * addr) 
-{
-  switch (nr&7) {
-  case 0:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "xi 0(1),0x01"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "cc", "memory" );
-    break;
-  case 1:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "xi 0(1),0x02"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "cc", "memory" );
-    break;
-  case 2:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "xi 0(1),0x04"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "cc", "memory" );
-    break;
-  case 3:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "xi 0(1),0x08"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "cc", "memory" );
-    break;
-  case 4:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "xi 0(1),0x10"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "cc", "memory" );
-    break;
-  case 5:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "xi 0(1),0x20"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 6:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "xi 0(1),0x40"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 7:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "xi 0(1),0x80"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
-                          : : "1", "cc", "memory" );
-    break;
-  }
+	unsigned long addr;
+
+	addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
+        asm volatile("xc 0(1,%1),0(%2)"
+		     : "+m" (*(char *) addr)
+		     : "a" (addr), "a" (_oi_bitmap + (nr & 7))
+		     : "cc" );
+}
+
+static inline void 
+__constant_change_bit(const int nr, volatile void *ptr) 
+{
+	unsigned long addr;
+
+	addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
+	switch (nr&7) {
+	case 0:
+		asm volatile ("xi 0(%1),0x01"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 1:
+		asm volatile ("xi 0(%1),0x02"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 2:
+		asm volatile ("xi 0(%1),0x04"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 3:
+		asm volatile ("xi 0(%1),0x08"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 4:
+		asm volatile ("xi 0(%1),0x10"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 5:
+		asm volatile ("xi 0(%1),0x20"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 6:
+		asm volatile ("xi 0(%1),0x40"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 7:
+		asm volatile ("xi 0(%1),0x80"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	}
 }
 
 #define change_bit_simple(nr,addr) \
@@ -482,74 +391,54 @@
 /*
  * fast, non-SMP test_and_set_bit routine
  */
-static __inline__ int test_and_set_bit_simple(int nr, volatile void * addr)
+static inline int test_and_set_bit_simple(int nr, volatile void *ptr)
 {
-	unsigned long reg1, reg2;
-        int oldbit;
-        __asm__ __volatile__(
-             "   lhi   %1,24\n"
-             "   lhi   %2,7\n"
-             "   xr    %1,%3\n"
-             "   nr    %2,%3\n"
-             "   srl   %1,3\n"
-             "   la    %1,0(%1,%4)\n"
-             "   ic    %0,0(%1)\n"
-             "   srl   %0,0(%2)\n"
-             "   la    %2,0(%2,%5)\n"
-             "   oc    0(1,%1),0(%2)"
-             : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
-             : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
-        return oldbit & 1;
+	unsigned long addr;
+	unsigned char ch;
+
+	addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
+	ch = *(unsigned char *) addr;
+        asm volatile("oc 0(1,%1),0(%2)"
+		     : "+m" (*(char *) addr)
+		     : "a" (addr), "a" (_oi_bitmap + (nr & 7))
+		     : "cc" );
+	return (ch >> (nr & 7)) & 1;
 }
 #define __test_and_set_bit(X,Y)		test_and_set_bit_simple(X,Y)
 
 /*
  * fast, non-SMP test_and_clear_bit routine
  */
-static __inline__ int test_and_clear_bit_simple(int nr, volatile void * addr)
+static inline int test_and_clear_bit_simple(int nr, volatile void *ptr)
 {
-	unsigned long reg1, reg2;
-        int oldbit;
+	unsigned long addr;
+	unsigned char ch;
 
-        __asm__ __volatile__(
-             "   lhi   %1,24\n"
-             "   lhi   %2,7\n"
-             "   xr    %1,%3\n"
-             "   nr    %2,%3\n"
-             "   srl   %1,3\n"
-             "   la    %1,0(%1,%4)\n"
-             "   ic    %0,0(%1)\n"
-             "   srl   %0,0(%2)\n"
-             "   la    %2,0(%2,%5)\n"
-             "   nc    0(1,%1),0(%2)"
-             : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
-             : "r" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
-        return oldbit & 1;
+	addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
+	ch = *(unsigned char *) addr;
+        asm volatile("nc 0(1,%1),0(%2)"
+		     : "+m" (*(char *) addr)
+		     : "a" (addr), "a" (_ni_bitmap + (nr & 7))
+		     : "cc" );
+	return (ch >> (nr & 7)) & 1;
 }
 #define __test_and_clear_bit(X,Y)	test_and_clear_bit_simple(X,Y)
 
 /*
  * fast, non-SMP test_and_change_bit routine
  */
-static __inline__ int test_and_change_bit_simple(int nr, volatile void * addr)
+static inline int test_and_change_bit_simple(int nr, volatile void *ptr)
 {
-	unsigned long reg1, reg2;
-        int oldbit;
+	unsigned long addr;
+	unsigned char ch;
 
-        __asm__ __volatile__(
-             "   lhi   %1,24\n"
-             "   lhi   %2,7\n"
-             "   xr    %1,%3\n"
-             "   nr    %2,%1\n"
-             "   srl   %1,3\n"
-             "   la    %1,0(%1,%4)\n"
-             "   ic    %0,0(%1)\n"
-             "   srl   %0,0(%2)\n"
-             "   la    %2,0(%2,%5)\n"
-             "   xc    0(1,%1),0(%2)"
-             : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
-             : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
-        return oldbit & 1;
+	addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
+	ch = *(unsigned char *) addr;
+        asm volatile("xc 0(1,%1),0(%2)"
+		     : "+m" (*(char *) addr)
+		     : "a" (addr), "a" (_oi_bitmap + (nr & 7))
+		     : "cc" );
+	return (ch >> (nr & 7)) & 1;
 }
 #define __test_and_change_bit(X,Y)	test_and_change_bit_simple(X,Y)
 
@@ -574,25 +463,17 @@
  * This routine doesn't need to be atomic.
  */
 
-static __inline__ int __test_bit(int nr, volatile void * addr)
+static inline int __test_bit(int nr, volatile void *ptr)
 {
-	unsigned long reg1, reg2;
-        int oldbit;
+	unsigned long addr;
+	unsigned char ch;
 
-        __asm__ __volatile__(
-             "   lhi   %2,24\n"
-             "   lhi   %1,7\n"
-             "   xr    %2,%3\n"
-             "   nr    %1,%3\n"
-             "   srl   %2,3\n"
-             "   ic    %0,0(%2,%4)\n"
-             "   srl   %0,0(%1)"
-             : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
-             : "r" (nr), "a" (addr) : "cc" );
-        return oldbit & 1;
+	addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
+	ch = *(unsigned char *) addr;
+	return (ch >> (nr & 7)) & 1;
 }
 
-static __inline__ int __constant_test_bit(int nr, volatile void * addr) {
+static inline int __constant_test_bit(int nr, volatile void * addr) {
     return (((volatile char *) addr)[(nr>>3)^3] & (1<<(nr&7))) != 0;
 }
 
@@ -604,7 +485,7 @@
 /*
  * Find-bit routines..
  */
-static __inline__ int find_first_zero_bit(void * addr, unsigned size)
+static inline int find_first_zero_bit(void * addr, unsigned size)
 {
 	unsigned long cmp, count;
         int res;
@@ -642,7 +523,45 @@
         return (res < size) ? res : size;
 }
 
-static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
+static inline int find_first_bit(void * addr, unsigned size)
+{
+	unsigned long cmp, count;
+        int res;
+
+        if (!size)
+                return 0;
+        __asm__("   slr  %1,%1\n"
+                "   lr   %2,%3\n"
+                "   slr  %0,%0\n"
+                "   ahi  %2,31\n"
+                "   srl  %2,5\n"
+                "0: c    %1,0(%0,%4)\n"
+                "   jne  1f\n"
+                "   ahi  %0,4\n"
+                "   brct %2,0b\n"
+                "   lr   %0,%3\n"
+                "   j    4f\n"
+                "1: l    %2,0(%0,%4)\n"
+                "   sll  %0,3\n"
+                "   lhi  %1,0xff\n"
+                "   tml  %2,0xffff\n"
+                "   jnz  2f\n"
+                "   ahi  %0,16\n"
+                "   srl  %2,16\n"
+                "2: tml  %2,0x00ff\n"
+                "   jnz  3f\n"
+                "   ahi  %0,8\n"
+                "   srl  %2,8\n"
+                "3: nr   %2,%1\n"
+                "   ic   %2,0(%2,%5)\n"
+                "   alr  %0,%2\n"
+                "4:"
+                : "=&a" (res), "=&d" (cmp), "=&a" (count)
+                : "a" (size), "a" (addr), "a" (&_sb_findmap) : "cc" );
+        return (res < size) ? res : size;
+}
+
+static inline int find_next_zero_bit (void * addr, int size, int offset)
 {
         unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
         unsigned long bitvec, reg;
@@ -680,11 +599,49 @@
         return (offset + res);
 }
 
+static inline int find_next_bit (void * addr, int size, int offset)
+{
+        unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
+        unsigned long bitvec, reg;
+        int set, bit = offset & 31, res;
+
+        if (bit) {
+                /*
+                 * Look for set bit in first word
+                 */
+                bitvec = (*p) >> bit;
+                __asm__("   slr  %0,%0\n"
+                        "   lhi  %2,0xff\n"
+                        "   tml  %1,0xffff\n"
+                        "   jnz  0f\n"
+                        "   ahi  %0,16\n"
+                        "   srl  %1,16\n"
+                        "0: tml  %1,0x00ff\n"
+                        "   jnz  1f\n"
+                        "   ahi  %0,8\n"
+                        "   srl  %1,8\n"
+                        "1: nr   %1,%2\n"
+                        "   ic   %1,0(%1,%3)\n"
+                        "   alr  %0,%1"
+                        : "=&d" (set), "+a" (bitvec), "=&d" (reg)
+                        : "a" (&_sb_findmap) : "cc" );
+                if (set < (32 - bit))
+                        return set + offset;
+                offset += 32 - bit;
+                p++;
+        }
+        /*
+         * No set bit yet, search remaining full words for a bit
+         */
+        res = find_first_bit (p, size - 32 * (p - (unsigned long *) addr));
+        return (offset + res);
+}
+
 /*
  * ffz = Find First Zero in word. Undefined if no zero exists,
  * so code should check against ~0UL first..
  */
-static __inline__ unsigned long ffz(unsigned long word)
+static inline unsigned long ffz(unsigned long word)
 {
 	unsigned long reg;
         int result;
@@ -708,40 +665,109 @@
 }
 
 /*
+ * __ffs = find first bit in word. Undefined if no bit exists,
+ * so code should check against 0UL first..
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+	unsigned long reg, result;
+
+        __asm__("   slr  %0,%0\n"
+                "   lhi  %2,0xff\n"
+                "   tml  %1,0xffff\n"
+                "   jnz  0f\n"
+                "   ahi  %0,16\n"
+                "   srl  %1,16\n"
+                "0: tml  %1,0x00ff\n"
+                "   jnz  1f\n"
+                "   ahi  %0,8\n"
+                "   srl  %1,8\n"
+                "1: nr   %1,%2\n"
+                "   ic   %1,0(%1,%3)\n"
+                "   alr  %0,%1"
+                : "=&d" (result), "+a" (word), "=&d" (reg)
+                : "a" (&_sb_findmap) : "cc" );
+        return result;
+}
+
+/*
+ * Every architecture must define this function. It's the fastest
+ * way of searching a 140-bit bitmap where the first 100 bits are
+ * unlikely to be set. It's guaranteed that at least one of the 140
+ * bits is cleared.
+ */
+static inline int sched_find_first_bit(unsigned long *b)
+{
+	return find_first_bit(b, 140);
+}
+
+/*
  * ffs: find first bit set. This is defined the same way as
  * the libc and compiler builtin ffs routines, therefore
  * differs in spirit from the above ffz (man ffs).
  */
 
-extern int __inline__ ffs (int x)
+extern int inline ffs (int x)
 {
-        int r;
+        int r = 1;
 
         if (x == 0)
-          return 0;
-        __asm__("    slr  %0,%0\n"
-                "    tml  %1,0xffff\n"
+		return 0;
+        __asm__("    tml  %1,0xffff\n"
                 "    jnz  0f\n"
-                "    ahi  %0,16\n"
                 "    srl  %1,16\n"
+                "    ahi  %0,16\n"
                 "0:  tml  %1,0x00ff\n"
                 "    jnz  1f\n"
-                "    ahi  %0,8\n"
                 "    srl  %1,8\n"
+                "    ahi  %0,8\n"
                 "1:  tml  %1,0x000f\n"
                 "    jnz  2f\n"
-                "    ahi  %0,4\n"
                 "    srl  %1,4\n"
+                "    ahi  %0,4\n"
                 "2:  tml  %1,0x0003\n"
                 "    jnz  3f\n"
-                "    ahi  %0,2\n"
                 "    srl  %1,2\n"
+                "    ahi  %0,2\n"
                 "3:  tml  %1,0x0001\n"
                 "    jnz  4f\n"
                 "    ahi  %0,1\n"
                 "4:"
                 : "=&d" (r), "+d" (x) : : "cc" );
-        return r+1;
+        return r;
+}
+
+/*
+ * fls: find last bit set.
+ */
+extern __inline__ int fls(int x)
+{
+	int r = 32;
+
+	if (x == 0)
+		return 0;
+	__asm__("    tmh  %1,0xffff\n"
+		"    jz   0f\n"
+		"    sll  %1,16\n"
+		"    ahi  %0,-16\n"
+		"0:  tmh  %1,0xff00\n"
+		"    jz   1f\n"
+		"    sll  %1,8\n"
+		"    ahi  %0,-8\n"
+		"1:  tmh  %1,0xf000\n"
+		"    jz   2f\n"
+		"    sll  %1,4\n"
+		"    ahi  %0,-4\n"
+		"2:  tmh  %1,0xc000\n"
+		"    jz   3f\n"
+		"    sll  %1,2\n"
+		"    ahi  %0,-2\n"
+		"3:  tmh  %1,0x8000\n"
+		"    jz   4f\n"
+		"    ahi  %0,-1\n"
+		"4:"
+		: "+d" (r), "+d" (x) : : "cc" );
+	return r;
 }
 
 /*
@@ -769,7 +795,7 @@
 #define ext2_set_bit(nr, addr)       test_and_set_bit((nr)^24, addr)
 #define ext2_clear_bit(nr, addr)     test_and_clear_bit((nr)^24, addr)
 #define ext2_test_bit(nr, addr)      test_bit((nr)^24, addr)
-static __inline__ int ext2_find_first_zero_bit(void *vaddr, unsigned size)
+static inline int ext2_find_first_zero_bit(void *vaddr, unsigned size)
 {
 	unsigned long cmp, count;
         int res;
@@ -808,7 +834,7 @@
         return (res < size) ? res : size;
 }
 
-static __inline__ int 
+static inline int 
 ext2_find_next_zero_bit(void *vaddr, unsigned size, unsigned offset)
 {
         unsigned long *addr = vaddr;
diff -Nur linux-2.4.33-imedia/include/asm-s390x/bitops.h linux-2.4.33-imedia-patching/include/asm-s390x/bitops.h
--- linux-2.4.33-imedia/include/asm-s390x/bitops.h	2002-08-03 03:39:45.000000000 +0300
+++ linux-2.4.33-imedia-patching/include/asm-s390x/bitops.h	2006-01-26 15:19:43.000000000 +0200
@@ -51,271 +51,220 @@
 extern const char _oi_bitmap[];
 extern const char _ni_bitmap[];
 extern const char _zb_findmap[];
+extern const char _sb_findmap[];
 
 #ifdef CONFIG_SMP
 /*
  * SMP save set_bit routine based on compare and swap (CS)
  */
-static __inline__ void set_bit_cs(unsigned long nr, volatile void * addr)
+static inline void set_bit_cs(unsigned long nr, volatile void *ptr)
 {
-        unsigned long bits, mask;
-        __asm__ __volatile__(
+        unsigned long addr, old, new, mask;
+
+	addr = (unsigned long) ptr;
 #if ALIGN_CS == 1
-             "   lghi  %2,7\n"         /* CS must be aligned on 4 byte b. */
-             "   ngr   %2,%1\n"        /* isolate last 2 bits of address */
-             "   xgr   %1,%2\n"        /* make addr % 4 == 0 */
-             "   sllg  %2,%2,3\n"
-             "   agr   %0,%2\n"        /* add alignement to bitnr */
+	addr ^= addr & 7;		/* align address to 8 */
+	nr += (addr & 7) << 3;		/* add alignment to bit number */
 #endif
-             "   lghi  %2,63\n"
-             "   nr    %2,%0\n"        /* make shift value */
-             "   xr    %0,%2\n"
-             "   srlg  %0,%0,3\n"
-             "   lghi  %3,1\n"
-             "   la    %1,0(%0,%1)\n"  /* calc. address for CS */
-             "   sllg  %3,%3,0(%2)\n"  /* make OR mask */
-             "   lg    %0,0(%1)\n"
-             "0: lgr   %2,%0\n"        /* CS loop starts here */
-             "   ogr   %2,%3\n"        /* set bit */
-             "   csg   %0,%2,0(%1)\n"
-             "   jl    0b"
-             : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
-             : "cc", "memory" );
+	addr += (nr ^ (nr & 63)) >> 3;	/* calculate address for CS */
+	mask = 1UL << (nr & 63);	/* make OR mask */
+	asm volatile(
+		"   lg   %0,0(%4)\n"
+		"0: lgr  %1,%0\n"
+		"   ogr  %1,%3\n"
+		"   csg  %0,%1,0(%4)\n"
+		"   jl   0b"
+		: "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
+		: "d" (mask), "a" (addr)
+		: "cc" );
 }
 
 /*
  * SMP save clear_bit routine based on compare and swap (CS)
  */
-static __inline__ void clear_bit_cs(unsigned long nr, volatile void * addr)
+static inline void clear_bit_cs(unsigned long nr, volatile void *ptr)
 {
-        unsigned long bits, mask;
-        __asm__ __volatile__(
+        unsigned long addr, old, new, mask;
+
+	addr = (unsigned long) ptr;
 #if ALIGN_CS == 1
-             "   lghi  %2,7\n"         /* CS must be aligned on 4 byte b. */
-             "   ngr   %2,%1\n"        /* isolate last 2 bits of address */
-             "   xgr   %1,%2\n"        /* make addr % 4 == 0 */
-             "   sllg  %2,%2,3\n"
-             "   agr   %0,%2\n"        /* add alignement to bitnr */
+	addr ^= addr & 7;		/* align address to 8 */
+	nr += (addr & 7) << 3;		/* add alignment to bit number */
 #endif
-             "   lghi  %2,63\n"
-             "   nr    %2,%0\n"        /* make shift value */
-             "   xr    %0,%2\n"
-             "   srlg  %0,%0,3\n"
-             "   lghi  %3,-2\n"
-             "   la    %1,0(%0,%1)\n"  /* calc. address for CS */
-             "   lghi  %3,-2\n"
-             "   rllg  %3,%3,0(%2)\n"  /* make AND mask */
-             "   lg    %0,0(%1)\n"
-             "0: lgr   %2,%0\n"        /* CS loop starts here */
-             "   ngr   %2,%3\n"        /* clear bit */
-             "   csg   %0,%2,0(%1)\n"
-             "   jl    0b"
-             : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
-             : "cc", "memory" );
+	addr += (nr ^ (nr & 63)) >> 3;	/* calculate address for CS */
+	mask = ~(1UL << (nr & 63));	/* make AND mask */
+	asm volatile(
+		"   lg   %0,0(%4)\n"
+		"0: lgr  %1,%0\n"
+		"   ngr  %1,%3\n"
+		"   csg  %0,%1,0(%4)\n"
+		"   jl   0b"
+		: "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
+		: "d" (mask), "a" (addr)
+		: "cc" );
 }
 
 /*
  * SMP save change_bit routine based on compare and swap (CS)
  */
-static __inline__ void change_bit_cs(unsigned long nr, volatile void * addr)
+static inline void change_bit_cs(unsigned long nr, volatile void *ptr)
 {
-        unsigned long bits, mask;
-        __asm__ __volatile__(
+        unsigned long addr, old, new, mask;
+
+	addr = (unsigned long) ptr;
 #if ALIGN_CS == 1
-             "   lghi  %2,7\n"         /* CS must be aligned on 4 byte b. */
-             "   ngr   %2,%1\n"        /* isolate last 2 bits of address */
-             "   xgr   %1,%2\n"        /* make addr % 4 == 0 */
-             "   sllg  %2,%2,3\n"
-             "   agr   %0,%2\n"        /* add alignement to bitnr */
+	addr ^= addr & 7;		/* align address to 8 */
+	nr += (addr & 7) << 3;		/* add alignment to bit number */
 #endif
-             "   lghi  %2,63\n"
-             "   nr    %2,%0\n"        /* make shift value */
-             "   xr    %0,%2\n"
-             "   srlg  %0,%0,3\n"
-             "   lghi  %3,1\n"
-             "   la    %1,0(%0,%1)\n"  /* calc. address for CS */
-             "   sllg  %3,%3,0(%2)\n"  /* make XR mask */
-             "   lg    %0,0(%1)\n"
-             "0: lgr   %2,%0\n"        /* CS loop starts here */
-             "   xgr   %2,%3\n"        /* change bit */
-             "   csg   %0,%2,0(%1)\n"
-             "   jl    0b"
-             : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) : 
-             : "cc", "memory" );
+	addr += (nr ^ (nr & 63)) >> 3;	/* calculate address for CS */
+	mask = 1UL << (nr & 63);	/* make XOR mask */
+	asm volatile(
+		"   lg   %0,0(%4)\n"
+		"0: lgr  %1,%0\n"
+		"   xgr  %1,%3\n"
+		"   csg  %0,%1,0(%4)\n"
+		"   jl   0b"
+		: "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
+		: "d" (mask), "a" (addr)
+		: "cc" );
 }
 
 /*
  * SMP save test_and_set_bit routine based on compare and swap (CS)
  */
-static __inline__ int 
-test_and_set_bit_cs(unsigned long nr, volatile void * addr)
+static inline int 
+test_and_set_bit_cs(unsigned long nr, volatile void *ptr)
 {
-        unsigned long bits, mask;
-        __asm__ __volatile__(
+        unsigned long addr, old, new, mask;
+
+	addr = (unsigned long) ptr;
 #if ALIGN_CS == 1
-             "   lghi  %2,7\n"         /* CS must be aligned on 4 byte b. */
-             "   ngr   %2,%1\n"        /* isolate last 2 bits of address */
-             "   xgr   %1,%2\n"        /* make addr % 4 == 0 */
-             "   sllg  %2,%2,3\n"
-             "   agr   %0,%2\n"        /* add alignement to bitnr */
+	addr ^= addr & 7;		/* align address to 8 */
+	nr += (addr & 7) << 3;		/* add alignment to bit number */
 #endif
-             "   lghi  %2,63\n"
-             "   nr    %2,%0\n"        /* make shift value */
-             "   xr    %0,%2\n"
-             "   srlg  %0,%0,3\n"
-             "   lghi  %3,1\n"
-             "   la    %1,0(%0,%1)\n"  /* calc. address for CS */
-             "   sllg  %3,%3,0(%2)\n"  /* make OR mask */
-             "   lg    %0,0(%1)\n"
-             "0: lgr   %2,%0\n"        /* CS loop starts here */
-             "   ogr   %2,%3\n"        /* set bit */
-             "   csg   %0,%2,0(%1)\n"
-             "   jl    0b\n"
-             "   ngr   %0,%3\n"        /* isolate old bit */
-             : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
-             : "cc", "memory" );
-        return nr != 0;
+	addr += (nr ^ (nr & 63)) >> 3;	/* calculate address for CS */
+	mask = 1UL << (nr & 63);	/* make OR/test mask */
+	asm volatile(
+		"   lg   %0,0(%4)\n"
+		"0: lgr  %1,%0\n"
+		"   ogr  %1,%3\n"
+		"   csg  %0,%1,0(%4)\n"
+		"   jl   0b"
+		: "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
+		: "d" (mask), "a" (addr)
+		: "cc" );
+	return (old & mask) != 0;
 }
 
 /*
  * SMP save test_and_clear_bit routine based on compare and swap (CS)
  */
-static __inline__ int
-test_and_clear_bit_cs(unsigned long nr, volatile void * addr)
+static inline int
+test_and_clear_bit_cs(unsigned long nr, volatile void *ptr)
 {
-        unsigned long bits, mask;
-        __asm__ __volatile__(
+        unsigned long addr, old, new, mask;
+
+	addr = (unsigned long) ptr;
 #if ALIGN_CS == 1
-             "   lghi  %2,7\n"         /* CS must be aligned on 4 byte b. */
-             "   ngr   %2,%1\n"        /* isolate last 2 bits of address */
-             "   xgr   %1,%2\n"        /* make addr % 4 == 0 */
-             "   sllg  %2,%2,3\n"
-             "   agr   %0,%2\n"        /* add alignement to bitnr */
+	addr ^= addr & 7;		/* align address to 8 */
+	nr += (addr & 7) << 3;		/* add alignment to bit number */
 #endif
-             "   lghi  %2,63\n"
-             "   nr    %2,%0\n"        /* make shift value */
-             "   xr    %0,%2\n"
-             "   srlg  %0,%0,3\n"
-             "   lghi  %3,-2\n"
-             "   la    %1,0(%0,%1)\n"  /* calc. address for CS */
-             "   rllg  %3,%3,0(%2)\n"  /* make AND mask */
-             "   lg    %0,0(%1)\n"
-             "0: lgr   %2,%0\n"        /* CS loop starts here */
-             "   ngr   %2,%3\n"        /* clear bit */
-             "   csg   %0,%2,0(%1)\n"
-             "   jl    0b\n"
-             "   xgr   %0,%2\n"        /* isolate old bit */
-             : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
-             : "cc", "memory" );
-        return nr != 0;
+	addr += (nr ^ (nr & 63)) >> 3;	/* calculate address for CS */
+	mask = ~(1UL << (nr & 63));	/* make AND mask */
+	asm volatile(
+		"   lg   %0,0(%4)\n"
+		"0: lgr  %1,%0\n"
+		"   ngr  %1,%3\n"
+		"   csg  %0,%1,0(%4)\n"
+		"   jl   0b"
+		: "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
+		: "d" (mask), "a" (addr)
+		: "cc" );
+	return (old ^ new) != 0;
 }
 
 /*
  * SMP save test_and_change_bit routine based on compare and swap (CS) 
  */
-static __inline__ int
-test_and_change_bit_cs(unsigned long nr, volatile void * addr)
+static inline int
+test_and_change_bit_cs(unsigned long nr, volatile void *ptr)
 {
-        unsigned long bits, mask;
-        __asm__ __volatile__(
+        unsigned long addr, old, new, mask;
+
+	addr = (unsigned long) ptr;
 #if ALIGN_CS == 1
-             "   lghi  %2,7\n"         /* CS must be aligned on 4 byte b. */
-             "   ngr   %2,%1\n"        /* isolate last 2 bits of address */
-             "   xgr   %1,%2\n"        /* make addr % 4 == 0 */
-             "   sllg  %2,%2,3\n"
-             "   agr   %0,%2\n"        /* add alignement to bitnr */
+	addr ^= addr & 7;		/* align address to 8 */
+	nr += (addr & 7) << 3;		/* add alignment to bit number */
 #endif
-             "   lghi  %2,63\n"
-             "   nr    %2,%0\n"        /* make shift value */
-             "   xr    %0,%2\n"
-             "   srlg  %0,%0,3\n"
-             "   lghi  %3,1\n"
-             "   la    %1,0(%0,%1)\n"  /* calc. address for CS */
-             "   sllg  %3,%3,0(%2)\n"  /* make OR mask */
-             "   lg    %0,0(%1)\n"
-             "0: lgr   %2,%0\n"        /* CS loop starts here */
-             "   xgr   %2,%3\n"        /* change bit */
-             "   csg   %0,%2,0(%1)\n"
-             "   jl    0b\n"
-             "   ngr   %0,%3\n"        /* isolate old bit */
-             : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
-             : "cc", "memory" );
-        return nr != 0;
+	addr += (nr ^ (nr & 63)) >> 3;	/* calculate address for CS */
+	mask = 1UL << (nr & 63);	/* make XOR mask */
+	asm volatile(
+		"   lg   %0,0(%4)\n"
+		"0: lgr  %1,%0\n"
+		"   xgr  %1,%3\n"
+		"   csg  %0,%1,0(%4)\n"
+		"   jl   0b"
+		: "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
+		: "d" (mask), "a" (addr)
+		: "cc" );
+	return (old & mask) != 0;
 }
 #endif /* CONFIG_SMP */
 
 /*
  * fast, non-SMP set_bit routine
  */
-static __inline__ void __set_bit(unsigned long nr, volatile void * addr)
+static inline void __set_bit(unsigned long nr, volatile void *ptr)
 {
-	unsigned long reg1, reg2;
-        __asm__ __volatile__(
-             "   lghi  %1,56\n"
-             "   lghi  %0,7\n"
-             "   xgr   %1,%2\n"
-             "   nr    %0,%2\n"
-             "   srlg  %1,%1,3\n"
-             "   la    %1,0(%1,%3)\n"
-             "   la    %0,0(%0,%4)\n"
-             "   oc    0(1,%1),0(%0)"
-             : "=&a" (reg1), "=&a" (reg2)
-             : "a" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
-}
-
-static __inline__ void 
-__constant_set_bit(const unsigned long nr, volatile void * addr)
-{
-  switch (nr&7) {
-  case 0:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "oi 0(1),0x01"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7))) 
-                          : : "1", "cc", "memory");
-    break;
-  case 1:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "oi 0(1),0x02"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 2:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "oi 0(1),0x04"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 3:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "oi 0(1),0x08"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 4:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "oi 0(1),0x10"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 5:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "oi 0(1),0x20"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 6:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "oi 0(1),0x40"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 7:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "oi 0(1),0x80"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  }
+	unsigned long addr;
+
+	addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
+        asm volatile("oc 0(1,%1),0(%2)"
+		     : "+m" (*(char *) addr)
+		     : "a" (addr), "a" (_oi_bitmap + (nr & 7))
+		     : "cc" );
+}
+
+static inline void 
+__constant_set_bit(const unsigned long nr, volatile void *ptr)
+{
+	unsigned long addr;
+
+	addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
+	switch (nr&7) {
+	case 0:
+		asm volatile ("oi 0(%1),0x01"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 1:
+		asm volatile ("oi 0(%1),0x02"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 2:
+		asm volatile ("oi 0(%1),0x04"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 3:
+		asm volatile ("oi 0(%1),0x08"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 4:
+		asm volatile ("oi 0(%1),0x10"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 5:
+		asm volatile ("oi 0(%1),0x20"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 6:
+		asm volatile ("oi 0(%1),0x40"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 7:
+		asm volatile ("oi 0(%1),0x80"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	}
 }
 
 #define set_bit_simple(nr,addr) \
@@ -326,76 +275,58 @@
 /*
  * fast, non-SMP clear_bit routine
  */
-static __inline__ void 
-__clear_bit(unsigned long nr, volatile void * addr)
+static inline void 
+__clear_bit(unsigned long nr, volatile void *ptr)
 {
-	unsigned long reg1, reg2;
-        __asm__ __volatile__(
-             "   lghi  %1,56\n"
-             "   lghi  %0,7\n"
-             "   xgr   %1,%2\n"
-             "   nr    %0,%2\n"
-             "   srlg  %1,%1,3\n"
-             "   la    %1,0(%1,%3)\n"
-             "   la    %0,0(%0,%4)\n"
-             "   nc    0(1,%1),0(%0)"
-             : "=&a" (reg1), "=&a" (reg2)
-	     : "d" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
-}
-
-static __inline__ void 
-__constant_clear_bit(const unsigned long nr, volatile void * addr)
-{
-  switch (nr&7) {
-  case 0:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "ni 0(1),0xFE"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 1:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "ni 0(1),0xFD"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 2:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "ni 0(1),0xFB"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 3:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "ni 0(1),0xF7"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 4:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "ni 0(1),0xEF"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "cc", "memory" );
-    break;
-  case 5:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "ni 0(1),0xDF"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 6:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "ni 0(1),0xBF"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 7:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "ni 0(1),0x7F"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  }
+	unsigned long addr;
+
+	addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
+        asm volatile("nc 0(1,%1),0(%2)"
+		     : "+m" (*(char *) addr)
+		     : "a" (addr), "a" (_ni_bitmap + (nr & 7))
+		     : "cc" );
+}
+
+static inline void 
+__constant_clear_bit(const unsigned long nr, volatile void *ptr)
+{
+	unsigned long addr;
+
+	addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
+	switch (nr&7) {
+	case 0:
+		asm volatile ("ni 0(%1),0xFE"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 1:
+		asm volatile ("ni 0(%1),0xFD"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 2:
+		asm volatile ("ni 0(%1),0xFB"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 3:
+		asm volatile ("ni 0(%1),0xF7"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 4:
+		asm volatile ("ni 0(%1),0xEF"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 5:
+		asm volatile ("ni 0(%1),0xDF"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 6:
+		asm volatile ("ni 0(%1),0xBF"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 7:
+		asm volatile ("ni 0(%1),0x7F"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	}
 }
 
 #define clear_bit_simple(nr,addr) \
@@ -406,75 +337,57 @@
 /* 
  * fast, non-SMP change_bit routine 
  */
-static __inline__ void __change_bit(unsigned long nr, volatile void * addr)
+static inline void __change_bit(unsigned long nr, volatile void *ptr)
 {
-	unsigned long reg1, reg2;
-        __asm__ __volatile__(
-             "   lghi  %1,56\n"
-             "   lghi  %0,7\n"
-             "   xgr   %1,%2\n"
-             "   nr    %0,%2\n"
-             "   srlg  %1,%1,3\n"
-             "   la    %1,0(%1,%3)\n"
-             "   la    %0,0(%0,%4)\n"
-             "   xc    0(1,%1),0(%0)"
-             : "=&a" (reg1), "=&a" (reg2)
-	     : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
-}
-
-static __inline__ void 
-__constant_change_bit(const unsigned long nr, volatile void * addr) 
-{
-  switch (nr&7) {
-  case 0:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "xi 0(1),0x01"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "cc", "memory" );
-    break;
-  case 1:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "xi 0(1),0x02"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "cc", "memory" );
-    break;
-  case 2:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "xi 0(1),0x04"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "cc", "memory" );
-    break;
-  case 3:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "xi 0(1),0x08"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "cc", "memory" );
-    break;
-  case 4:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "xi 0(1),0x10"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "cc", "memory" );
-    break;
-  case 5:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "xi 0(1),0x20"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 6:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "xi 0(1),0x40"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  case 7:
-    __asm__ __volatile__ ("la 1,%0\n\t"
-                          "xi 0(1),0x80"
-                          : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
-                          : : "1", "cc", "memory" );
-    break;
-  }
+	unsigned long addr;
+
+	addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
+        asm volatile("xc 0(1,%1),0(%2)"
+		     :  "+m" (*(char *) addr)
+		     : "a" (addr), "a" (_oi_bitmap + (nr & 7))
+		     : "cc" );
+}
+
+static inline void 
+__constant_change_bit(const unsigned long nr, volatile void *ptr) 
+{
+	unsigned long addr;
+
+	addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
+	switch (nr&7) {
+	case 0:
+		asm volatile ("xi 0(%1),0x01"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 1:
+		asm volatile ("xi 0(%1),0x02"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 2:
+		asm volatile ("xi 0(%1),0x04"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 3:
+		asm volatile ("xi 0(%1),0x08"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 4:
+		asm volatile ("xi 0(%1),0x10"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 5:
+		asm volatile ("xi 0(%1),0x20"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 6:
+		asm volatile ("xi 0(%1),0x40"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	case 7:
+		asm volatile ("xi 0(%1),0x80"
+			      : "+m" (*(char *) addr) : "a" (addr) : "cc" );
+		break;
+	}
 }
 
 #define change_bit_simple(nr,addr) \
@@ -485,77 +398,57 @@
 /*
  * fast, non-SMP test_and_set_bit routine
  */
-static __inline__ int
-test_and_set_bit_simple(unsigned long nr, volatile void * addr)
+static inline int
+test_and_set_bit_simple(unsigned long nr, volatile void *ptr)
 {
-	unsigned long reg1, reg2;
-        int oldbit;
-        __asm__ __volatile__(
-             "   lghi  %1,56\n"
-             "   lghi  %2,7\n"
-             "   xgr   %1,%3\n"
-             "   nr    %2,%3\n"
-             "   srlg  %1,%1,3\n"
-             "   la    %1,0(%1,%4)\n"
-             "   ic    %0,0(%1)\n"
-             "   srl   %0,0(%2)\n"
-             "   la    %2,0(%2,%5)\n"
-             "   oc    0(1,%1),0(%2)"
-             : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
-	     : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
-        return oldbit & 1;
+	unsigned long addr;
+	unsigned char ch;
+
+	addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
+	ch = *(unsigned char *) addr;
+        asm volatile("oc 0(1,%1),0(%2)"
+		     : "+m" (*(char *) addr)
+		     : "a" (addr), "a" (_oi_bitmap + (nr & 7))
+		     : "cc" );
+	return (ch >> (nr & 7)) & 1;
 }
 #define __test_and_set_bit(X,Y)		test_and_set_bit_simple(X,Y)
 
 /*
  * fast, non-SMP test_and_clear_bit routine
  */
-static __inline__ int
-test_and_clear_bit_simple(unsigned long nr, volatile void * addr)
+static inline int
+test_and_clear_bit_simple(unsigned long nr, volatile void *ptr)
 {
-	unsigned long reg1, reg2;
-        int oldbit;
+	unsigned long addr;
+	unsigned char ch;
 
-        __asm__ __volatile__(
-             "   lghi  %1,56\n"
-             "   lghi  %2,7\n"
-             "   xgr   %1,%3\n"
-             "   nr    %2,%3\n"
-             "   srlg  %1,%1,3\n"
-             "   la    %1,0(%1,%4)\n"
-             "   ic    %0,0(%1)\n"
-             "   srl   %0,0(%2)\n"
-             "   la    %2,0(%2,%5)\n"
-             "   nc    0(1,%1),0(%2)"
-             : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
-	     : "d" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
-        return oldbit & 1;
+	addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
+	ch = *(unsigned char *) addr;
+        asm volatile("nc 0(1,%1),0(%2)"
+		     : "+m" (*(char *) addr)
+		     : "a" (addr), "a" (_ni_bitmap + (nr & 7))
+		     : "cc" );
+	return (ch >> (nr & 7)) & 1;
 }
 #define __test_and_clear_bit(X,Y)	test_and_clear_bit_simple(X,Y)
 
 /*
  * fast, non-SMP test_and_change_bit routine
  */
-static __inline__ int
-test_and_change_bit_simple(unsigned long nr, volatile void * addr)
+static inline int
+test_and_change_bit_simple(unsigned long nr, volatile void *ptr)
 {
-	unsigned long reg1, reg2;
-        int oldbit;
+	unsigned long addr;
+	unsigned char ch;
 
-        __asm__ __volatile__(
-             "   lghi  %1,56\n"
-             "   lghi  %2,7\n"
-             "   xgr   %1,%3\n"
-             "   nr    %2,%3\n"
-             "   srlg  %1,%1,3\n"
-             "   la    %1,0(%1,%4)\n"
-             "   ic    %0,0(%1)\n"
-             "   srl   %0,0(%2)\n"
-             "   la    %2,0(%2,%5)\n"
-             "   xc    0(1,%1),0(%2)"
-             : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
-	     : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
-        return oldbit & 1;
+	addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
+	ch = *(unsigned char *) addr;
+        asm volatile("xc 0(1,%1),0(%2)"
+		     : "+m" (*(char *) addr)
+		     : "a" (addr), "a" (_oi_bitmap + (nr & 7))
+		     : "cc" );
+	return (ch >> (nr & 7)) & 1;
 }
 #define __test_and_change_bit(X,Y)	test_and_change_bit_simple(X,Y)
 
@@ -580,26 +473,18 @@
  * This routine doesn't need to be atomic.
  */
 
-static __inline__ int __test_bit(unsigned long nr, volatile void * addr)
+static inline int __test_bit(unsigned long nr, volatile void *ptr)
 {
-	unsigned long reg1, reg2;
-        int oldbit;
+	unsigned long addr;
+	unsigned char ch;
 
-        __asm__ __volatile__(
-             "   lghi  %2,56\n"
-             "   lghi  %1,7\n"
-             "   xgr   %2,%3\n"
-             "   nr    %1,%3\n"
-             "   srlg  %2,%2,3\n"
-             "   ic    %0,0(%2,%4)\n"
-             "   srl   %0,0(%1)\n"
-             : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
-	     : "d" (nr), "a" (addr) : "cc" );
-        return oldbit & 1;
+	addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
+	ch = *(unsigned char *) addr;
+	return (ch >> (nr & 7)) & 1;
 }
 
-static __inline__ int 
-__constant_test_bit(unsigned long nr, volatile void * addr) {
+static inline int 
+__constant_test_bit(unsigned long nr, volatile void *addr) {
     return (((volatile char *) addr)[(nr>>3)^7] & (1<<(nr&7))) != 0;
 }
 
@@ -611,7 +496,7 @@
 /*
  * Find-bit routines..
  */
-static __inline__ unsigned long
+static inline unsigned long
 find_first_zero_bit(void * addr, unsigned long size)
 {
         unsigned long res, cmp, count;
@@ -653,7 +538,49 @@
         return (res < size) ? res : size;
 }
 
-static __inline__ unsigned long
+static inline unsigned long
+find_first_bit(void * addr, unsigned long size)
+{
+        unsigned long res, cmp, count;
+
+        if (!size)
+                return 0;
+        __asm__("   slgr  %1,%1\n"
+                "   lgr   %2,%3\n"
+                "   slgr  %0,%0\n"
+                "   aghi  %2,63\n"
+                "   srlg  %2,%2,6\n"
+                "0: cg    %1,0(%0,%4)\n"
+                "   jne   1f\n"
+                "   aghi  %0,8\n"
+                "   brct  %2,0b\n"
+                "   lgr   %0,%3\n"
+                "   j     5f\n"
+                "1: lg    %2,0(%0,%4)\n"
+                "   sllg  %0,%0,3\n"
+                "   clr   %2,%1\n"
+		"   jne   2f\n"
+		"   aghi  %0,32\n"
+                "   srlg  %2,%2,32\n"
+		"2: lghi  %1,0xff\n"
+                "   tmll  %2,0xffff\n"
+                "   jnz   3f\n"
+                "   aghi  %0,16\n"
+                "   srl   %2,16\n"
+                "3: tmll  %2,0x00ff\n"
+                "   jnz   4f\n"
+                "   aghi  %0,8\n"
+                "   srl   %2,8\n"
+                "4: ngr   %2,%1\n"
+                "   ic    %2,0(%2,%5)\n"
+                "   algr  %0,%2\n"
+                "5:"
+                : "=&a" (res), "=&d" (cmp), "=&a" (count)
+		: "a" (size), "a" (addr), "a" (&_sb_findmap) : "cc" );
+        return (res < size) ? res : size;
+}
+
+static inline unsigned long
 find_next_zero_bit (void * addr, unsigned long size, unsigned long offset)
 {
         unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
@@ -697,14 +624,56 @@
         return (offset + res);
 }
 
+static inline unsigned long
+find_next_bit (void * addr, unsigned long size, unsigned long offset)
+{
+        unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
+        unsigned long bitvec, reg;
+        unsigned long set, bit = offset & 63, res;
+
+        if (bit) {
+                /*
+                 * Look for zero in first word
+                 */
+                bitvec = (*p) >> bit;
+                __asm__("   slgr %0,%0\n"
+                        "   ltr  %1,%1\n"
+                        "   jnz  0f\n"
+                        "   aghi %0,32\n"
+                        "   srlg %1,%1,32\n"
+			"0: lghi %2,0xff\n"
+                        "   tmll %1,0xffff\n"
+                        "   jnz  1f\n"
+                        "   aghi %0,16\n"
+                        "   srlg %1,%1,16\n"
+                        "1: tmll %1,0x00ff\n"
+                        "   jnz  2f\n"
+                        "   aghi %0,8\n"
+                        "   srlg %1,%1,8\n"
+                        "2: ngr  %1,%2\n"
+                        "   ic   %1,0(%1,%3)\n"
+                        "   algr %0,%1"
+                        : "=&d" (set), "+a" (bitvec), "=&d" (reg)
+                        : "a" (&_sb_findmap) : "cc" );
+                if (set < (64 - bit))
+                        return set + offset;
+                offset += 64 - bit;
+                p++;
+        }
+        /*
+         * No set bit yet, search remaining full words for a bit
+         */
+        res = find_first_bit (p, size - 64 * (p - (unsigned long *) addr));
+        return (offset + res);
+}
+
 /*
  * ffz = Find First Zero in word. Undefined if no zero exists,
  * so code should check against ~0UL first..
  */
-static __inline__ unsigned long ffz(unsigned long word)
+static inline unsigned long ffz(unsigned long word)
 {
-	unsigned long reg;
-        int result;
+	unsigned long reg, result;
 
         __asm__("   lhi  %2,-1\n"
                 "   slgr %0,%0\n"
@@ -730,40 +699,112 @@
 }
 
 /*
+ * __ffs = find first bit in word. Undefined if no bit exists,
+ * so code should check against 0UL first..
+ */
+static inline unsigned long __ffs (unsigned long word)
+{
+        unsigned long reg, result;
+
+        __asm__("   slgr %0,%0\n"
+                "   ltr  %1,%1\n"
+                "   jnz  0f\n"
+                "   aghi %0,32\n"
+                "   srlg %1,%1,32\n"
+                "0: lghi %2,0xff\n"
+                "   tmll %1,0xffff\n"
+                "   jnz  1f\n"
+                "   aghi %0,16\n"
+                "   srlg %1,%1,16\n"
+                "1: tmll %1,0x00ff\n"
+                "   jnz  2f\n"
+                "   aghi %0,8\n"
+                "   srlg %1,%1,8\n"
+                "2: ngr  %1,%2\n"
+                "   ic   %1,0(%1,%3)\n"
+                "   algr %0,%1"
+                : "=&d" (result), "+a" (word), "=&d" (reg)
+                : "a" (&_sb_findmap) : "cc" );
+        return result;
+}
+
+/*
+ * Every architecture must define this function. It's the fastest
+ * way of searching a 140-bit bitmap where the first 100 bits are
+ * unlikely to be set. It's guaranteed that at least one of the 140
+ * bits is cleared.
+ */
+static inline int sched_find_first_bit(unsigned long *b)
+{
+	return find_first_bit(b, 140);
+}
+
+/*
  * ffs: find first bit set. This is defined the same way as
  * the libc and compiler builtin ffs routines, therefore
  * differs in spirit from the above ffz (man ffs).
  */
-
-extern int __inline__ ffs (int x)
+extern int inline ffs (int x)
 {
-        int r;
+        int r = 1;
 
         if (x == 0)
-          return 0;
-        __asm__("    slr  %0,%0\n"
-                "    tml  %1,0xffff\n"
+		return 0;
+        __asm__("    tml  %1,0xffff\n"
                 "    jnz  0f\n"
-                "    ahi  %0,16\n"
                 "    srl  %1,16\n"
+                "    ahi  %0,16\n"
                 "0:  tml  %1,0x00ff\n"
                 "    jnz  1f\n"
-                "    ahi  %0,8\n"
                 "    srl  %1,8\n"
+                "    ahi  %0,8\n"
                 "1:  tml  %1,0x000f\n"
                 "    jnz  2f\n"
-                "    ahi  %0,4\n"
                 "    srl  %1,4\n"
+                "    ahi  %0,4\n"
                 "2:  tml  %1,0x0003\n"
                 "    jnz  3f\n"
-                "    ahi  %0,2\n"
                 "    srl  %1,2\n"
+                "    ahi  %0,2\n"
                 "3:  tml  %1,0x0001\n"
                 "    jnz  4f\n"
                 "    ahi  %0,1\n"
                 "4:"
                 : "=&d" (r), "+d" (x) : : "cc" );
-        return r+1;
+        return r;
+}
+
+/*
+ * fls: find last bit set.
+ */
+extern __inline__ int fls(int x)
+{
+	int r = 32;
+
+	if (x == 0)
+		return 0;
+	__asm__("    tmh  %1,0xffff\n"
+		"    jz   0f\n"
+		"    sll  %1,16\n"
+		"    ahi  %0,-16\n"
+		"0:  tmh  %1,0xff00\n"
+		"    jz   1f\n"
+		"    sll  %1,8\n"
+		"    ahi  %0,-8\n"
+		"1:  tmh  %1,0xf000\n"
+		"    jz   2f\n"
+		"    sll  %1,4\n"
+		"    ahi  %0,-4\n"
+		"2:  tmh  %1,0xc000\n"
+		"    jz   3f\n"
+		"    sll  %1,2\n"
+		"    ahi  %0,-2\n"
+		"3:  tmh  %1,0x8000\n"
+		"    jz   4f\n"
+		"    ahi  %0,-1\n"
+		"4:"
+		: "+d" (r), "+d" (x) : : "cc" );
+	return r;
 }
 
 /*
@@ -791,7 +832,7 @@
 #define ext2_set_bit(nr, addr)       test_and_set_bit((nr)^56, addr)
 #define ext2_clear_bit(nr, addr)     test_and_clear_bit((nr)^56, addr)
 #define ext2_test_bit(nr, addr)      test_bit((nr)^56, addr)
-static __inline__ unsigned long
+static inline unsigned long
 ext2_find_first_zero_bit(void *vaddr, unsigned long size)
 {
         unsigned long res, cmp, count;
@@ -833,7 +874,7 @@
         return (res < size) ? res : size;
 }
 
-static __inline__ unsigned long
+static inline unsigned long
 ext2_find_next_zero_bit(void *vaddr, unsigned long size, unsigned long offset)
 {
         unsigned long *addr = vaddr;
diff -Nur linux-2.4.33-imedia/include/asm-sparc64/system.h linux-2.4.33-imedia-patching/include/asm-sparc64/system.h
--- linux-2.4.33-imedia/include/asm-sparc64/system.h	2006-01-11 20:29:28.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/asm-sparc64/system.h	2006-01-26 15:19:43.000000000 +0200
@@ -186,7 +186,11 @@
 
 #define flush_user_windows flushw_user
 #define flush_register_windows flushw_all
-#define prepare_to_switch flushw_all
+
+#define prepare_arch_schedule(prev)		task_lock(prev)
+#define finish_arch_schedule(prev)		task_unlock(prev)
+#define prepare_arch_switch(rq)			do { spin_unlock(&(rq)->lock); flushw_all(); }
+#define finish_arch_switch(rq)			__sti()
 
 #ifndef CONFIG_DEBUG_SPINLOCK
 #define CHECK_LOCKS(PREV)	do { } while(0)
diff -Nur linux-2.4.33-imedia/include/linux/brlock.h linux-2.4.33-imedia-patching/include/linux/brlock.h
--- linux-2.4.33-imedia/include/linux/brlock.h	2006-01-11 19:27:30.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/linux/brlock.h	2006-01-26 15:19:43.000000000 +0200
@@ -125,11 +125,11 @@
 }
 
 #else
-# define br_read_lock(idx)	((void)(idx))
-# define br_read_unlock(idx)	((void)(idx))
-# define br_write_lock(idx)	((void)(idx))
-# define br_write_unlock(idx)	((void)(idx))
-#endif
+# define br_read_lock(idx)	({ (void)(idx); preempt_disable(); })
+# define br_read_unlock(idx)	({ (void)(idx); preempt_enable(); })
+# define br_write_lock(idx)	({ (void)(idx); preempt_disable(); })
+# define br_write_unlock(idx)	({ (void)(idx); preempt_enable(); })
+#endif	/* CONFIG_SMP */
 
 /*
  * Now enumerate all of the possible sw/hw IRQ protected
diff -Nur linux-2.4.33-imedia/include/linux/dcache.h linux-2.4.33-imedia-patching/include/linux/dcache.h
--- linux-2.4.33-imedia/include/linux/dcache.h	2006-01-11 19:27:16.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/linux/dcache.h	2006-01-26 15:19:43.000000000 +0200
@@ -127,31 +127,6 @@
 
 extern spinlock_t dcache_lock;
 
-/**
- * d_drop - drop a dentry
- * @dentry: dentry to drop
- *
- * d_drop() unhashes the entry from the parent
- * dentry hashes, so that it won't be found through
- * a VFS lookup any more. Note that this is different
- * from deleting the dentry - d_delete will try to
- * mark the dentry negative if possible, giving a
- * successful _negative_ lookup, while d_drop will
- * just make the cache lookup fail.
- *
- * d_drop() is used mainly for stuff that wants
- * to invalidate a dentry for some reason (NFS
- * timeouts or autofs deletes).
- */
-
-static __inline__ void d_drop(struct dentry * dentry)
-{
-	spin_lock(&dcache_lock);
-	list_del(&dentry->d_hash);
-	INIT_LIST_HEAD(&dentry->d_hash);
-	spin_unlock(&dcache_lock);
-}
-
 static __inline__ int dname_external(struct dentry *d)
 {
 	return d->d_name.name != d->d_iname; 
@@ -276,3 +251,34 @@
 #endif /* __KERNEL__ */
 
 #endif	/* __LINUX_DCACHE_H */
+
+#if !defined(__LINUX_DCACHE_H_INLINES) && defined(_TASK_STRUCT_DEFINED)
+#define __LINUX_DCACHE_H_INLINES
+
+#ifdef __KERNEL__
+/**
+ * d_drop - drop a dentry
+ * @dentry: dentry to drop
+ *
+ * d_drop() unhashes the entry from the parent
+ * dentry hashes, so that it won't be found through
+ * a VFS lookup any more. Note that this is different
+ * from deleting the dentry - d_delete will try to
+ * mark the dentry negative if possible, giving a
+ * successful _negative_ lookup, while d_drop will
+ * just make the cache lookup fail.
+ *
+ * d_drop() is used mainly for stuff that wants
+ * to invalidate a dentry for some reason (NFS
+ * timeouts or autofs deletes).
+ */
+
+static __inline__ void d_drop(struct dentry * dentry)
+{
+	spin_lock(&dcache_lock);
+	list_del(&dentry->d_hash);
+	INIT_LIST_HEAD(&dentry->d_hash);
+	spin_unlock(&dcache_lock);
+}
+#endif
+#endif
diff -Nur linux-2.4.33-imedia/include/linux/fs_struct.h linux-2.4.33-imedia-patching/include/linux/fs_struct.h
--- linux-2.4.33-imedia/include/linux/fs_struct.h	2001-07-14 01:10:44.000000000 +0300
+++ linux-2.4.33-imedia-patching/include/linux/fs_struct.h	2006-01-26 15:19:43.000000000 +0200
@@ -20,6 +20,15 @@
 extern void exit_fs(struct task_struct *);
 extern void set_fs_altroot(void);
 
+struct fs_struct *copy_fs_struct(struct fs_struct *old);
+void put_fs_struct(struct fs_struct *fs);
+
+#endif
+#endif
+
+#if !defined(_LINUX_FS_STRUCT_H_INLINES) && defined(_TASK_STRUCT_DEFINED)
+#define _LINUX_FS_STRUCT_H_INLINES
+#ifdef __KERNEL__
 /*
  * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
  * It can block. Requires the big lock held.
@@ -65,9 +74,5 @@
 		mntput(old_pwdmnt);
 	}
 }
-
-struct fs_struct *copy_fs_struct(struct fs_struct *old);
-void put_fs_struct(struct fs_struct *fs);
-
 #endif
 #endif
diff -Nur linux-2.4.33-imedia/include/linux/highmem.h linux-2.4.33-imedia-patching/include/linux/highmem.h
--- linux-2.4.33-imedia/include/linux/highmem.h	2006-01-11 19:27:18.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/linux/highmem.h	2006-01-26 15:19:43.000000000 +0200
@@ -33,18 +33,8 @@
 {
 	unsigned long addr;
 
-	__save_flags(*flags);
+	local_irq_save(*flags);
 
-	/*
-	 * could be low
-	 */
-	if (!PageHighMem(bh->b_page))
-		return bh->b_data;
-
-	/*
-	 * it's a highmem page
-	 */
-	__cli();
 	addr = (unsigned long) kmap_atomic(bh->b_page, KM_BH_IRQ);
 
 	if (addr & ~PAGE_MASK)
@@ -58,7 +48,7 @@
 	unsigned long ptr = (unsigned long) buffer & PAGE_MASK;
 
 	kunmap_atomic((void *) ptr, KM_BH_IRQ);
-	__restore_flags(*flags);
+	local_irq_restore(*flags);
 }
 
 #else /* CONFIG_HIGHMEM */
diff -Nur linux-2.4.33-imedia/include/linux/kernel_stat.h linux-2.4.33-imedia-patching/include/linux/kernel_stat.h
--- linux-2.4.33-imedia/include/linux/kernel_stat.h	2006-01-11 19:27:17.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/linux/kernel_stat.h	2006-01-26 15:19:43.000000000 +0200
@@ -31,9 +31,10 @@
 #elif !defined(CONFIG_ARCH_S390)
 	unsigned int irqs[NR_CPUS][NR_IRQS];
 #endif
-	unsigned int context_swtch;
 };
 
+extern unsigned long nr_context_switches(void);
+
 extern struct kernel_stat kstat;
 
 extern unsigned long nr_context_switches(void);
diff -Nur linux-2.4.33-imedia/include/linux/list.h linux-2.4.33-imedia-patching/include/linux/list.h
--- linux-2.4.33-imedia/include/linux/list.h	2006-01-11 19:27:16.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/linux/list.h	2006-01-26 15:19:43.000000000 +0200
@@ -19,6 +19,8 @@
 	struct list_head *next, *prev;
 };
 
+typedef struct list_head list_t;
+
 #define LIST_HEAD_INIT(name) { &(name), &(name) }
 
 #define LIST_HEAD(name) \
diff -Nur linux-2.4.33-imedia/include/linux/low-latency.h linux-2.4.33-imedia-patching/include/linux/low-latency.h
--- linux-2.4.33-imedia/include/linux/low-latency.h	1970-01-01 02:00:00.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/linux/low-latency.h	2006-01-26 15:19:43.000000000 +0200
@@ -0,0 +1,109 @@
+/*
+ * include/linux/low-latency.h
+ *
+ * Andrew Morton <akpm@zip.com.au>
+ */
+
+#ifndef LOW_LATENCY_H_INCLUDED
+#define LOW_LATENCY_H_INCLUDED
+
+#if defined(CONFIG_LOLAT)
+#define LOWLATENCY_NEEDED	1
+#else
+#define LOWLATENCY_NEEDED	0
+#endif
+
+#if LOWLATENCY_NEEDED
+
+#include <linux/cache.h>		/* For ____cacheline_aligned */
+
+#ifdef CONFIG_LOLAT_SYSCTL
+extern struct low_latency_enable_struct {
+	int yep;
+} ____cacheline_aligned __enable_lowlatency;
+#define enable_lowlatency __enable_lowlatency.yep
+
+#else
+#define enable_lowlatency 1
+#endif
+
+/*
+ * Set this non-zero to generate low-latency instrumentation
+ */
+#define LOWLATENCY_DEBUG		0
+
+/*
+ * Set this non-zero for robustness testing
+ */
+#define LOWLATENCY_ALWAYS_SCHEDULE	0
+
+#if LOWLATENCY_DEBUG
+
+#if LOWLATENCY_ALWAYS_SCHEDULE
+#define conditional_schedule_needed() ((enable_lowlatency == 2) || (enable_lowlatency && current->need_resched))
+#else
+#define conditional_schedule_needed() (enable_lowlatency && current->need_resched)
+#endif
+
+struct lolat_stats_t {
+	unsigned long count;
+	int visited;
+	const char *file;
+	int line;
+	struct lolat_stats_t *next;
+};
+
+void set_running_and_schedule(struct lolat_stats_t *stats);
+
+#define unconditional_schedule()					\
+	do {								\
+		static struct lolat_stats_t stats = {			\
+			file: __FILE__,					\
+			line: __LINE__,					\
+		};							\
+		set_running_and_schedule(&stats);			\
+	} while (0)
+
+extern void show_lolat_stats(void);
+
+#else	/* LOWLATENCY_DEBUG */
+
+#if LOWLATENCY_ALWAYS_SCHEDULE
+#define conditional_schedule_needed() 1
+#else
+#define conditional_schedule_needed() (current->need_resched)
+#endif
+
+void set_running_and_schedule(void);
+#define unconditional_schedule() set_running_and_schedule()
+
+#endif	/* LOWLATENCY_DEBUG */
+
+#define conditional_schedule()						\
+	do {								\
+		if (conditional_schedule_needed())			\
+			unconditional_schedule();			\
+	} while (0)
+
+#define DEFINE_RESCHED_COUNT	int resched_count = 0
+#define TEST_RESCHED_COUNT(n)	(enable_lowlatency && (++resched_count > (n)))
+#define RESET_RESCHED_COUNT()	resched_count = 0
+extern int ll_copy_to_user(void *to_user, const void *from, unsigned long len);
+extern int ll_copy_from_user(void *to, const void *from_user, unsigned long len);
+
+#else	/* LOWLATENCY_NEEDED */
+
+#define conditional_schedule_needed() 0
+#define conditional_schedule()
+#define unconditional_schedule()
+
+#define DEFINE_RESCHED_COUNT
+#define TEST_RESCHED_COUNT(n)	0
+#define RESET_RESCHED_COUNT()
+#define ll_copy_to_user(to_user, from, len) copy_to_user((to_user), (from), (len))
+#define ll_copy_from_user(to, from_user, len) copy_from_user((to), (from_user), (len))
+
+#endif	/* LOWLATENCY_NEEDED */
+
+#endif /* LOW_LATENCY_H_INCLUDED */
+
diff -Nur linux-2.4.33-imedia/include/linux/mm.h linux-2.4.33-imedia-patching/include/linux/mm.h
--- linux-2.4.33-imedia/include/linux/mm.h	2006-01-11 19:27:17.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/linux/mm.h	2006-01-26 15:19:43.000000000 +0200
@@ -124,6 +124,8 @@
  */
 extern pgprot_t protection_map[16];
 
+/* Actions for zap_page_range() */
+#define ZPR_COND_RESCHED	1	/* Do a conditional_schedule() occasionally */
 
 /*
  * These are the virtual MM functions - opening of an area, closing and
@@ -487,7 +489,7 @@
 extern void shmem_lock(struct file * file, int lock);
 extern int shmem_zero_setup(struct vm_area_struct *);
 
-extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size);
+extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, int actions);
 extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma);
 extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
 extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
diff -Nur linux-2.4.33-imedia/include/linux/reiserfs_fs.h linux-2.4.33-imedia-patching/include/linux/reiserfs_fs.h
--- linux-2.4.33-imedia/include/linux/reiserfs_fs.h	2006-01-11 20:35:37.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/linux/reiserfs_fs.h	2006-01-26 15:19:43.000000000 +0200
@@ -1329,8 +1329,8 @@
 #define fs_generation(s) ((s)->u.reiserfs_sb.s_generation_counter)
 #define get_generation(s) atomic_read (&fs_generation(s))
 #define FILESYSTEM_CHANGED_TB(tb)  (get_generation((tb)->tb_sb) != (tb)->fs_gen)
-#define fs_changed(gen,s) (gen != get_generation (s))
-
+#define __fs_changed(gen,s) (gen != get_generation (s))
+#define fs_changed(gen,s) ({conditional_schedule(); __fs_changed(gen,s);})
 
 /***************************************************************************/
 /*                  FIXATE NODES                                           */
diff -Nur linux-2.4.33-imedia/include/linux/sched.h linux-2.4.33-imedia-patching/include/linux/sched.h
--- linux-2.4.33-imedia/include/linux/sched.h	2006-01-11 19:27:17.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/linux/sched.h	2006-01-26 15:19:43.000000000 +0200
@@ -26,6 +26,7 @@
 #include <linux/signal.h>
 #include <linux/securebits.h>
 #include <linux/fs_struct.h>
+#include <linux/low-latency.h>
 
 struct exec_domain;
 
@@ -73,16 +74,16 @@
 #define CT_TO_SECS(x)	((x) / HZ)
 #define CT_TO_USECS(x)	(((x) % HZ) * 1000000/HZ)
 
-extern int nr_running, nr_threads;
+extern int nr_threads;
 extern int last_pid;
+extern unsigned long nr_running(void);
+extern unsigned long nr_uninterruptible(void);
 
 #include <linux/fs.h>
 #include <linux/time.h>
 #include <linux/param.h>
 #include <linux/resource.h>
-#ifdef __KERNEL__
 #include <linux/timer.h>
-#endif
 
 #include <asm/processor.h>
 
@@ -91,6 +92,11 @@
 #define TASK_UNINTERRUPTIBLE	2
 #define TASK_ZOMBIE		4
 #define TASK_STOPPED		8
+#define PREEMPT_ACTIVE		0x4000000
+
+#define task_cpu(p) ((p)->cpu)
+#define set_task_cpu(p, c) do { (p)->cpu = (c); } while (0)
+#define cpu_online(i) ((i) < smp_num_cpus)
 
 #define __set_task_state(tsk, state_value)		\
 	do { (tsk)->state = (state_value); } while (0)
@@ -105,15 +111,13 @@
 /*
  * Scheduling policies
  */
-#define SCHED_OTHER		0
+#define SCHED_NORMAL		0
 #define SCHED_FIFO		1
 #define SCHED_RR		2
+#define SCHED_BATCH		3
 
-/*
- * This is an additional bit set when we want to
- * yield the CPU for one re-schedule..
- */
-#define SCHED_YIELD		0x10
+/* compatibility */
+#define SCHED_OTHER            SCHED_NORMAL
 
 struct sched_param {
 	int sched_priority;
@@ -132,21 +136,30 @@
  * a separate lock).
  */
 extern rwlock_t tasklist_lock;
-extern spinlock_t runqueue_lock;
 extern spinlock_t mmlist_lock;
 
+typedef struct task_struct task_t;
+
 extern void sched_init(void);
-extern void init_idle(void);
+extern void init_idle(task_t *idle, int cpu);
 extern void show_state(void);
 extern void cpu_init (void);
 extern void trap_init(void);
 extern void update_process_times(int user);
-extern void update_one_process(struct task_struct *p, unsigned long user,
+extern void update_one_process(task_t *p, unsigned long user,
 			       unsigned long system, int cpu);
+extern void scheduler_tick(int user_tick, int system);
+extern void migration_init(void);
+extern unsigned long cache_decay_ticks;
+extern int set_user(uid_t new_ruid, int dumpclear);
 
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
 extern signed long FASTCALL(schedule_timeout(signed long timeout));
 asmlinkage void schedule(void);
+asmlinkage void schedule_userspace(void);
+#ifdef CONFIG_PREEMPT
+asmlinkage void preempt_schedule(void);
+#endif
 
 extern int schedule_task(struct tq_struct *task);
 extern void flush_scheduled_tasks(void);
@@ -160,6 +173,36 @@
 #endif
 
 /*
+ * Priority of a process goes from 0..MAX_PRIO-1, valid RT
+ * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL tasks are
+ * in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values
+ * are inverted: lower p->prio value means higher priority.
+ *
+ * The MAX_RT_USER_PRIO value allows the actual maximum
+ * RT priority to be separate from the value exported to
+ * user-space.  This allows kernel threads to set their
+ * priority to a value higher than any user task. Note:
+ * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
+ */
+
+#define MAX_USER_RT_PRIO       100
+#define MAX_RT_PRIO            MAX_USER_RT_PRIO
+
+#define MAX_PRIO               (MAX_RT_PRIO + 40)
+
+/*
+ * The maximum RT priority is configurable.  If the resulting
+ * bitmap is 160-bits , we can use a hand-coded routine which
+ * is optimal.  Otherwise, we fall back on a generic routine for
+ * finding the first set bit from an arbitrarily-sized bitmap.
+ */
+#if MAX_PRIO < 160 && MAX_PRIO > 127
+#define sched_find_first_bit(map)      _sched_find_first_bit(map)
+#else
+#define sched_find_first_bit(map)      find_first_bit(map, MAX_PRIO)
+#endif
+
+/*
  * The default fd array needs to be at least BITS_PER_LONG,
  * as this is the granularity returned by copy_fdset().
  */
@@ -280,12 +323,14 @@
 extern struct user_struct root_user;
 #define INIT_USER (&root_user)
 
+typedef struct prio_array prio_array_t;
+
 struct task_struct {
 	/*
 	 * offsets of these are hardcoded elsewhere - touch with care
 	 */
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
-	unsigned long flags;	/* per process flags, defined below */
+	int preempt_count;	/* 0 => preemptable, <0 => BUG */
 	int sigpending;
 	mm_segment_t addr_limit;	/* thread address space:
 					 	0-0xBFFFFFFF for user-thead
@@ -297,36 +342,30 @@
 
 	int lock_depth;		/* Lock depth */
 
-/*
- * offset 32 begins here on 32-bit platforms. We keep
- * all fields in a single cacheline that are needed for
- * the goodness() loop in schedule().
- */
-	long counter;
-	long nice;
-	unsigned long policy;
-	struct mm_struct *mm;
-	int processor;
-	/*
-	 * cpus_runnable is ~0 if the process is not running on any
-	 * CPU. It's (1 << cpu) if it's running on a CPU. This mask
-	 * is updated under the runqueue lock.
-	 *
-	 * To determine whether a process might run on a CPU, this
-	 * mask is AND-ed with cpus_allowed.
-	 */
-	unsigned long cpus_runnable, cpus_allowed;
 	/*
-	 * (only the 'next' pointer fits into the cacheline, but
-	 * that's just fine.)
+	 * offset 32 begins here on 32-bit platforms.
 	 */
-	struct list_head run_list;
-	unsigned long sleep_time;
+	unsigned int cpu;
+	int prio, static_prio;
+	list_t run_list;
+	prio_array_t *array;
+
+	unsigned long sleep_avg;
+	long interactive_credit;
+	unsigned long timestamp;
+	int activated;
 
-	struct task_struct *next_task, *prev_task;
-	struct mm_struct *active_mm;
+	unsigned long policy;
+	unsigned long cpus_allowed;
+	unsigned int time_slice, first_time_slice;
+
+	task_t *next_task, *prev_task;
+
+	struct mm_struct *mm, *active_mm;
 	struct list_head local_pages;
+
 	unsigned int allocation_order, nr_local_pages;
+	unsigned long flags;
 
 /* task state */
 	struct linux_binfmt *binfmt;
@@ -348,12 +387,12 @@
 	 * older sibling, respectively.  (p->father can be replaced with 
 	 * p->p_pptr->pid)
 	 */
-	struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
+	task_t *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
 	struct list_head thread_group;
 
 	/* PID hash table linkage. */
-	struct task_struct *pidhash_next;
-	struct task_struct **pidhash_pprev;
+	task_t *pidhash_next;
+	task_t **pidhash_pprev;
 
 	wait_queue_head_t wait_chldexit;	/* for wait4() */
 	struct completion *vfork_done;		/* for vfork() */
@@ -433,6 +472,8 @@
 #define PF_FREE_PAGES	0x00002000	/* per process page freeing */
 #define PF_NOIO		0x00004000	/* avoid generating further I/O */
 #define PF_FSTRANS	0x00008000	/* inside a filesystem transaction */
+#define PF_BATCH	0x00080000	/* batch-priority process */
+
 
 #define PF_USEDFPU	0x00100000	/* task used FPU this quantum (SMP) */
 
@@ -454,9 +495,16 @@
  */
 #define _STK_LIM	(8*1024*1024)
 
-#define DEF_COUNTER	(10*HZ/100)	/* 100 ms time slice */
-#define MAX_COUNTER	(20*HZ/100)
-#define DEF_NICE	(0)
+#if CONFIG_SMP
+extern void set_cpus_allowed(task_t *p, unsigned long new_mask);
+#else
+#define set_cpus_allowed(p, new_mask)  do { } while (0)
+#endif
+
+extern void set_user_nice(task_t *p, long nice);
+extern int task_prio(task_t *p);
+extern int task_nice(task_t *p);
+extern int idle_cpu(int cpu);
 
 extern void yield(void);
 
@@ -477,14 +525,14 @@
     addr_limit:		KERNEL_DS,					\
     exec_domain:	&default_exec_domain,				\
     lock_depth:		-1,						\
-    counter:		DEF_COUNTER,					\
-    nice:		DEF_NICE,					\
-    policy:		SCHED_OTHER,					\
+    prio:		MAX_PRIO-20,					\
+    static_prio:	MAX_PRIO-20,					\
+    policy:		SCHED_NORMAL,					\
+    cpus_allowed:	-1,						\
     mm:			NULL,						\
     active_mm:		&init_mm,					\
-    cpus_runnable:	~0UL,						\
-    cpus_allowed:	~0UL,						\
     run_list:		LIST_HEAD_INIT(tsk.run_list),			\
+    time_slice:		HZ,						\
     next_task:		&tsk,						\
     prev_task:		&tsk,						\
     p_opptr:		&tsk,						\
@@ -518,24 +566,24 @@
 #endif
 
 union task_union {
-	struct task_struct task;
+	task_t task;
 	unsigned long stack[INIT_TASK_SIZE/sizeof(long)];
 };
 
 extern union task_union init_task_union;
 
 extern struct   mm_struct init_mm;
-extern struct task_struct *init_tasks[NR_CPUS];
+extern task_t *init_tasks[NR_CPUS];
 
 /* PID hashing. (shouldnt this be dynamic?) */
 #define PIDHASH_SZ (4096 >> 2)
-extern struct task_struct *pidhash[PIDHASH_SZ];
+extern task_t *pidhash[PIDHASH_SZ];
 
 #define pid_hashfn(x)	((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))
 
-static inline void hash_pid(struct task_struct *p)
+static inline void hash_pid(task_t *p)
 {
-	struct task_struct **htable = &pidhash[pid_hashfn(p->pid)];
+	task_t **htable = &pidhash[pid_hashfn(p->pid)];
 
 	if((p->pidhash_next = *htable) != NULL)
 		(*htable)->pidhash_pprev = &p->pidhash_next;
@@ -543,16 +591,16 @@
 	p->pidhash_pprev = htable;
 }
 
-static inline void unhash_pid(struct task_struct *p)
+static inline void unhash_pid(task_t *p)
 {
 	if(p->pidhash_next)
 		p->pidhash_next->pidhash_pprev = p->pidhash_pprev;
 	*p->pidhash_pprev = p->pidhash_next;
 }
 
-static inline struct task_struct *find_task_by_pid(int pid)
+static inline task_t *find_task_by_pid(int pid)
 {
-	struct task_struct *p, **htable = &pidhash[pid_hashfn(pid)];
+	task_t *p, **htable = &pidhash[pid_hashfn(pid)];
 
 	for(p = *htable; p && p->pid != pid; p = p->pidhash_next)
 		;
@@ -560,19 +608,6 @@
 	return p;
 }
 
-#define task_has_cpu(tsk) ((tsk)->cpus_runnable != ~0UL)
-
-static inline void task_set_cpu(struct task_struct *tsk, unsigned int cpu)
-{
-	tsk->processor = cpu;
-	tsk->cpus_runnable = 1UL << cpu;
-}
-
-static inline void task_release_cpu(struct task_struct *tsk)
-{
-	tsk->cpus_runnable = ~0UL;
-}
-
 /* per-UID process charging. */
 extern struct user_struct * alloc_uid(uid_t);
 extern void free_uid(struct user_struct *);
@@ -600,47 +635,51 @@
 extern void FASTCALL(interruptible_sleep_on(wait_queue_head_t *q));
 extern long FASTCALL(interruptible_sleep_on_timeout(wait_queue_head_t *q,
 						    signed long timeout));
-extern int FASTCALL(wake_up_process(struct task_struct * tsk));
+extern int FASTCALL(wake_up_process(task_t * tsk));
+extern void FASTCALL(wake_up_forked_process(task_t * tsk));
+extern void FASTCALL(sched_exit(task_t * p));
 
 #define wake_up(x)			__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
 #define wake_up_nr(x, nr)		__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
 #define wake_up_all(x)			__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0)
-#define wake_up_sync(x)			__wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
-#define wake_up_sync_nr(x, nr)		__wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
 #define wake_up_interruptible(x)	__wake_up((x),TASK_INTERRUPTIBLE, 1)
 #define wake_up_interruptible_nr(x, nr)	__wake_up((x),TASK_INTERRUPTIBLE, nr)
 #define wake_up_interruptible_all(x)	__wake_up((x),TASK_INTERRUPTIBLE, 0)
-#define wake_up_interruptible_sync(x)	__wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
-#define wake_up_interruptible_sync_nr(x, nr) __wake_up_sync((x),TASK_INTERRUPTIBLE,  nr)
+#ifdef CONFIG_SMP
+#define wake_up_interruptible_sync(x)   __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
+#else
+#define wake_up_interruptible_sync(x)   __wake_up((x),TASK_INTERRUPTIBLE, 1)
+#endif
+
 asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru);
 
 extern int in_group_p(gid_t);
 extern int in_egroup_p(gid_t);
 
 extern void proc_caches_init(void);
-extern void flush_signals(struct task_struct *);
-extern void flush_signal_handlers(struct task_struct *);
+extern void flush_signals(task_t *);
+extern void flush_signal_handlers(task_t *);
 extern void sig_exit(int, int, struct siginfo *);
 extern int dequeue_signal(sigset_t *, siginfo_t *);
 extern void block_all_signals(int (*notifier)(void *priv), void *priv,
 			      sigset_t *mask);
 extern void unblock_all_signals(void);
-extern int send_sig_info(int, struct siginfo *, struct task_struct *);
-extern int force_sig_info(int, struct siginfo *, struct task_struct *);
+extern int send_sig_info(int, struct siginfo *, task_t *);
+extern int force_sig_info(int, struct siginfo *, task_t *);
 extern int kill_pg_info(int, struct siginfo *, pid_t);
 extern int kill_sl_info(int, struct siginfo *, pid_t);
 extern int kill_proc_info(int, struct siginfo *, pid_t);
-extern void notify_parent(struct task_struct *, int);
-extern void do_notify_parent(struct task_struct *, int);
-extern void force_sig(int, struct task_struct *);
-extern int send_sig(int, struct task_struct *, int);
+extern void notify_parent(task_t *, int);
+extern void do_notify_parent(task_t *, int);
+extern void force_sig(int, task_t *);
+extern int send_sig(int, task_t *, int);
 extern int kill_pg(pid_t, int, int);
 extern int kill_sl(pid_t, int, int);
 extern int kill_proc(pid_t, int, int);
 extern int do_sigaction(int, const struct k_sigaction *, struct k_sigaction *);
 extern int do_sigaltstack(const stack_t *, stack_t *, unsigned long);
 
-static inline int signal_pending(struct task_struct *p)
+static inline int signal_pending(task_t *p)
 {
 	return (p->sigpending != 0);
 }
@@ -679,7 +718,7 @@
    This is required every time the blocked sigset_t changes.
    All callers should have t->sigmask_lock.  */
 
-static inline void recalc_sigpending(struct task_struct *t)
+static inline void recalc_sigpending(task_t *t)
 {
 	t->sigpending = has_pending_signals(&t->pending.signal, &t->blocked);
 }
@@ -786,16 +825,17 @@
 extern int expand_fdset(struct files_struct *, int nr);
 extern void free_fdset(fd_set *, int);
 
-extern int  copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
+extern int  copy_thread(int, unsigned long, unsigned long, unsigned long, task_t *, struct pt_regs *);
 extern void flush_thread(void);
 extern void exit_thread(void);
 
-extern void exit_mm(struct task_struct *);
-extern void exit_files(struct task_struct *);
-extern void exit_sighand(struct task_struct *);
+extern void exit_mm(task_t *);
+extern void exit_files(task_t *);
+extern void exit_sighand(task_t *);
 
 extern void reparent_to_init(void);
 extern void daemonize(void);
+extern task_t *child_reaper;
 
 extern int do_execve(char *, char **, char **, struct pt_regs *);
 extern int do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long);
@@ -809,6 +849,9 @@
 
 extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 
+extern void wait_task_inactive(task_t * p);
+extern void kick_if_running(task_t * p);
+
 #define __wait_event(wq, condition) 					\
 do {									\
 	wait_queue_t __wait;						\
@@ -890,27 +933,12 @@
 	for (task = next_thread(current) ; task != current ; task = next_thread(task))
 
 #define next_thread(p) \
-	list_entry((p)->thread_group.next, struct task_struct, thread_group)
+	list_entry((p)->thread_group.next, task_t, thread_group)
 
 #define thread_group_leader(p)	(p->pid == p->tgid)
 
-static inline void del_from_runqueue(struct task_struct * p)
-{
-	nr_running--;
-	p->sleep_time = jiffies;
-	list_del(&p->run_list);
-	p->run_list.next = NULL;
-}
-
-static inline int task_on_runqueue(struct task_struct *p)
-{
-	return (p->run_list.next != NULL);
-}
-
-static inline void unhash_process(struct task_struct *p)
+static inline void unhash_process(task_t *p)
 {
-	if (task_on_runqueue(p))
-		out_of_line_bug();
 	write_lock_irq(&tasklist_lock);
 	nr_threads--;
 	unhash_pid(p);
@@ -920,12 +948,12 @@
 }
 
 /* Protects ->fs, ->files, ->mm, and synchronises with wait4().  Nests inside tasklist_lock */
-static inline void task_lock(struct task_struct *p)
+static inline void task_lock(task_t *p)
 {
 	spin_lock(&p->alloc_lock);
 }
 
-static inline void task_unlock(struct task_struct *p)
+static inline void task_unlock(task_t *p)
 {
 	spin_unlock(&p->alloc_lock);
 }
@@ -961,5 +989,30 @@
 		__cond_resched();
 }
 
+static inline void set_need_resched(void)
+{
+	current->need_resched = 1;
+}
+
+static inline void clear_need_resched(void)
+{
+	current->need_resched = 0;
+}
+
+static inline void set_tsk_need_resched(struct task_struct *tsk)
+{
+	tsk->need_resched = 1;
+}
+
+static inline void clear_tsk_need_resched(struct task_struct *tsk)
+{
+	tsk->need_resched = 0;
+}
+
+#define _TASK_STRUCT_DEFINED
+#include <linux/dcache.h>
+#include <linux/tqueue.h>
+#include <linux/fs_struct.h>
+
 #endif /* __KERNEL__ */
 #endif
diff -Nur linux-2.4.33-imedia/include/linux/smp.h linux-2.4.33-imedia-patching/include/linux/smp.h
--- linux-2.4.33-imedia/include/linux/smp.h	2006-01-11 19:27:17.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/linux/smp.h	2006-01-26 15:19:43.000000000 +0200
@@ -81,11 +81,21 @@
 #define smp_processor_id()			0
 #define hard_smp_processor_id()			0
 #define smp_threads_ready			1
+#ifndef CONFIG_PREEMPT
 #define kernel_lock()
+#endif
 #define cpu_logical_map(cpu)			0
 #define cpu_number_map(cpu)			0
 #define smp_call_function(func,info,retry,wait)	({ 0; })
 #define cpu_online_map				1
+static inline void smp_send_reschedule(int cpu) { }
+static inline void smp_send_reschedule_all(void) { }
 
 #endif
+
+/*
+ * Common definitions:
+ */
+#define cpu()                                  smp_processor_id()
+
 #endif
diff -Nur linux-2.4.33-imedia/include/linux/smp_lock.h linux-2.4.33-imedia-patching/include/linux/smp_lock.h
--- linux-2.4.33-imedia/include/linux/smp_lock.h	2006-01-11 19:27:17.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/linux/smp_lock.h	2006-01-26 15:19:43.000000000 +0200
@@ -3,7 +3,7 @@
 
 #include <linux/config.h>
 
-#ifndef CONFIG_SMP
+#if !defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT)
 
 #define lock_kernel()				do { } while(0)
 #define unlock_kernel()				do { } while(0)
diff -Nur linux-2.4.33-imedia/include/linux/spinlock.h linux-2.4.33-imedia-patching/include/linux/spinlock.h
--- linux-2.4.33-imedia/include/linux/spinlock.h	2006-01-11 19:27:16.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/linux/spinlock.h	2006-01-26 15:19:43.000000000 +0200
@@ -2,6 +2,7 @@
 #define __LINUX_SPINLOCK_H
 
 #include <linux/config.h>
+#include <linux/compiler.h>
 
 #include <asm/system.h>
 
@@ -64,8 +65,10 @@
 
 #if (DEBUG_SPINLOCKS < 1)
 
+#ifndef CONFIG_PREEMPT
 #define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic)
 #define ATOMIC_DEC_AND_LOCK
+#endif
 
 /*
  * Your basic spinlocks, allowing only a single CPU anywhere
@@ -82,11 +85,11 @@
 #endif
 
 #define spin_lock_init(lock)	do { } while(0)
-#define spin_lock(lock)		(void)(lock) /* Not "unused variable". */
+#define _raw_spin_lock(lock)	(void)(lock) /* Not "unused variable". */
 #define spin_is_locked(lock)	(0)
-#define spin_trylock(lock)	({1; })
+#define _raw_spin_trylock(lock)	({1; })
 #define spin_unlock_wait(lock)	do { } while(0)
-#define spin_unlock(lock)	do { } while(0)
+#define _raw_spin_unlock(lock)	do { } while(0)
 
 #elif (DEBUG_SPINLOCKS < 2)
 
@@ -146,13 +149,78 @@
 #endif
 
 #define rwlock_init(lock)	do { } while(0)
-#define read_lock(lock)		(void)(lock) /* Not "unused variable". */
-#define read_unlock(lock)	(void)(lock) /* Not "unused variable". */
-#define write_lock(lock)	(void)(lock) /* Not "unused variable". */
-#define write_unlock(lock)	do { } while(0)
+#define _raw_read_lock(lock)	(void)(lock) /* Not "unused variable". */
+#define _raw_read_unlock(lock)	(void)(lock) /* Not "unused variable". */
+#define _raw_write_lock(lock)	(void)(lock) /* Not "unused variable". */
+#define _raw_write_unlock(lock)	do { } while(0)
 
 #endif /* !SMP */
 
+#ifdef CONFIG_PREEMPT
+
+#define preempt_get_count()   (current->preempt_count)
+#define preempt_is_disabled() (preempt_get_count() != 0)
+
+#define preempt_disable() \
+do { \
+	++current->preempt_count; \
+	barrier(); \
+} while (0)
+
+#define preempt_enable_no_resched() \
+do { \
+	--current->preempt_count; \
+	barrier(); \
+} while (0)
+
+#define preempt_enable() \
+do { \
+	--current->preempt_count; \
+	barrier(); \
+	if (unlikely(current->preempt_count < current->need_resched)) \
+		preempt_schedule(); \
+} while (0)
+
+#define spin_lock(lock)       \
+do { \
+	preempt_disable(); \
+	_raw_spin_lock(lock); \
+} while(0)
+
+#define spin_trylock(lock)    ({preempt_disable(); _raw_spin_trylock(lock) ? \
+				1 : ({preempt_enable(); 0;});})
+#define spin_unlock(lock) \
+do { \
+	_raw_spin_unlock(lock); \
+	preempt_enable(); \
+} while (0)
+
+#define read_lock(lock)               ({preempt_disable(); _raw_read_lock(lock);})
+#define read_unlock(lock)     ({_raw_read_unlock(lock); preempt_enable();})
+#define write_lock(lock)      ({preempt_disable(); _raw_write_lock(lock);})
+#define write_unlock(lock)    ({_raw_write_unlock(lock); preempt_enable();})
+#define write_trylock(lock)   ({preempt_disable();_raw_write_trylock(lock) ? \
+				1 : ({preempt_enable(); 0;});})
+
+#else
+
+#define preempt_get_count()   (0)
+#define preempt_is_disabled() (1)
+#define preempt_disable()     do { } while (0)
+#define preempt_enable_no_resched()   do {} while(0)
+#define preempt_enable()      do { } while (0)
+
+#define spin_lock(lock)               _raw_spin_lock(lock)
+#define spin_trylock(lock)    _raw_spin_trylock(lock)
+#define spin_unlock(lock)     _raw_spin_unlock(lock)
+
+#define read_lock(lock)               _raw_read_lock(lock)
+#define read_unlock(lock)     _raw_read_unlock(lock)
+#define write_lock(lock)      _raw_write_lock(lock)
+#define write_unlock(lock)    _raw_write_unlock(lock)
+#define write_trylock(lock)   _raw_write_trylock(lock)
+#endif
+
 /* "lock on reference count zero" */
 #ifndef ATOMIC_DEC_AND_LOCK
 #include <asm/atomic.h>
diff -Nur linux-2.4.33-imedia/include/linux/sysctl.h linux-2.4.33-imedia-patching/include/linux/sysctl.h
--- linux-2.4.33-imedia/include/linux/sysctl.h	2006-01-11 20:29:28.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/linux/sysctl.h	2006-01-26 15:19:43.000000000 +0200
@@ -125,6 +125,7 @@
 	KERN_CORE_USES_PID=52,		/* int: use core or core.%pid */
 	KERN_TAINTED=53,	/* int: various kernel tainted flags */
 	KERN_CADPID=54,		/* int: PID of the process to notify on CAD */
+	KERN_LOWLATENCY=55,     /* int: enable low latency scheduling */
  	KERN_CORE_PATTERN=56,	/* string: pattern for core-files */
 	KERN_PPC_L3CR=57,       /* l3cr register on PPC */
 	KERN_EXCEPTION_TRACE=58, /* boolean: exception trace */
diff -Nur linux-2.4.33-imedia/include/linux/tqueue.h linux-2.4.33-imedia-patching/include/linux/tqueue.h
--- linux-2.4.33-imedia/include/linux/tqueue.h	2006-01-11 19:27:17.000000000 +0200
+++ linux-2.4.33-imedia-patching/include/linux/tqueue.h	2006-01-26 15:19:43.000000000 +0200
@@ -94,6 +94,22 @@
 extern spinlock_t tqueue_lock;
 
 /*
+ * Call all "bottom halfs" on a given list.
+ */
+
+extern void __run_task_queue(task_queue *list);
+
+static inline void run_task_queue(task_queue *list)
+{
+	if (TQ_ACTIVE(*list))
+		__run_task_queue(list);
+}
+
+#endif /* _LINUX_TQUEUE_H */
+
+#if !defined(_LINUX_TQUEUE_H_INLINES) && defined(_TASK_STRUCT_DEFINED)
+#define _LINUX_TQUEUE_H_INLINES
+/*
  * Queue a task on a tq.  Return non-zero if it was successfully
  * added.
  */
@@ -109,17 +125,4 @@
 	}
 	return ret;
 }
-
-/*
- * Call all "bottom halfs" on a given list.
- */
-
-extern void __run_task_queue(task_queue *list);
-
-static inline void run_task_queue(task_queue *list)
-{
-	if (TQ_ACTIVE(*list))
-		__run_task_queue(list);
-}
-
-#endif /* _LINUX_TQUEUE_H */
+#endif
diff -Nur linux-2.4.33-imedia/init/main.c linux-2.4.33-imedia-patching/init/main.c
--- linux-2.4.33-imedia/init/main.c	2004-11-17 13:54:22.000000000 +0200
+++ linux-2.4.33-imedia-patching/init/main.c	2006-01-26 15:19:43.000000000 +0200
@@ -298,8 +298,6 @@
 extern void setup_arch(char **);
 extern void cpu_idle(void);
 
-unsigned long wait_init_idle;
-
 #ifndef CONFIG_SMP
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -313,29 +311,19 @@
 
 #else
 
-
 /* Called by boot processor to activate the rest. */
 static void __init smp_init(void)
 {
 	/* Get other processors into their bootup holding patterns. */
 	smp_boot_cpus();
-	wait_init_idle = cpu_online_map;
-	clear_bit(current->processor, &wait_init_idle); /* Don't wait on me! */
 
 	smp_threads_ready=1;
 	smp_commence();
-
-	/* Wait for the other cpus to set up their idle processes */
-	printk("Waiting on wait_init_idle (map = 0x%lx)\n", wait_init_idle);
-	while (wait_init_idle) {
-		cpu_relax();
-		barrier();
-	}
-	printk("All processors have done init_idle\n");
 }
 
 #endif
 
+
 /*
  * We need to finalize in a non-__init function or else race conditions
  * between the root thread and the init thread may cause start_kernel to
@@ -347,7 +335,6 @@
 {
 	kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
 	unlock_kernel();
-	current->need_resched = 1;
  	cpu_idle();
 } 
 
@@ -434,6 +421,7 @@
 	acpi_early_init(); /* before LAPIC and SMP init */
 	printk("POSIX conformance testing by UNIFIX\n");
 
+	init_idle(current, smp_processor_id());
 	/* 
 	 *	We count on the initial thread going ok 
 	 *	Like idlers init is an unlocked kernel thread, which will
@@ -471,6 +459,10 @@
  */
 static void __init do_basic_setup(void)
 {
+	/* Start the per-CPU migration threads */
+#if CONFIG_SMP
+	migration_init();
+#endif
 
 	/*
 	 * Tell the world that we're going to be the grim
diff -Nur linux-2.4.33-imedia/kernel/capability.c linux-2.4.33-imedia-patching/kernel/capability.c
--- linux-2.4.33-imedia/kernel/capability.c	2000-06-24 07:06:37.000000000 +0300
+++ linux-2.4.33-imedia-patching/kernel/capability.c	2006-01-26 15:19:43.000000000 +0200
@@ -8,6 +8,8 @@
 #include <linux/mm.h>
 #include <asm/uaccess.h>
 
+unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
+
 kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
 
 /* Note: never hold tasklist_lock while spinning for this one */
diff -Nur linux-2.4.33-imedia/kernel/exit.c linux-2.4.33-imedia-patching/kernel/exit.c
--- linux-2.4.33-imedia/kernel/exit.c	2002-11-29 01:53:15.000000000 +0200
+++ linux-2.4.33-imedia-patching/kernel/exit.c	2006-01-26 15:19:43.000000000 +0200
@@ -28,49 +28,22 @@
 
 static void release_task(struct task_struct * p)
 {
-	if (p != current) {
+	if (p == current)
+		BUG();
 #ifdef CONFIG_SMP
-		/*
-		 * Wait to make sure the process isn't on the
-		 * runqueue (active on some other CPU still)
-		 */
-		for (;;) {
-			task_lock(p);
-			if (!task_has_cpu(p))
-				break;
-			task_unlock(p);
-			do {
-				cpu_relax();
-				barrier();
-			} while (task_has_cpu(p));
-		}
-		task_unlock(p);
+	wait_task_inactive(p);
 #endif
-		atomic_dec(&p->user->processes);
-		free_uid(p->user);
-		unhash_process(p);
-
-		release_thread(p);
-		current->cmin_flt += p->min_flt + p->cmin_flt;
-		current->cmaj_flt += p->maj_flt + p->cmaj_flt;
-		current->cnswap += p->nswap + p->cnswap;
-		/*
-		 * Potentially available timeslices are retrieved
-		 * here - this way the parent does not get penalized
-		 * for creating too many processes.
-		 *
-		 * (this cannot be used to artificially 'generate'
-		 * timeslices, because any timeslice recovered here
-		 * was given away by the parent in the first place.)
-		 */
-		current->counter += p->counter;
-		if (current->counter >= MAX_COUNTER)
-			current->counter = MAX_COUNTER;
-		p->pid = 0;
-		free_task_struct(p);
-	} else {
-		printk("task releasing itself\n");
-	}
+	atomic_dec(&p->user->processes);
+	free_uid(p->user);
+	unhash_process(p);
+
+	release_thread(p);
+	current->cmin_flt += p->min_flt + p->cmin_flt;
+	current->cmaj_flt += p->maj_flt + p->cmaj_flt;
+	current->cnswap += p->nswap + p->cnswap;
+	sched_exit(p);
+	p->pid = 0;
+	free_task_struct(p);
 }
 
 /*
@@ -150,6 +123,79 @@
 	return retval;
 }
 
+/**
+ * reparent_to_init() - Reparent the calling kernel thread to the init task.
+ *
+ * If a kernel thread is launched as a result of a system call, or if
+ * it ever exits, it should generally reparent itself to init so that
+ * it is correctly cleaned up on exit.
+ *
+ * The various task state such as scheduling policy and priority may have
+ * been inherited from a user process, so we reset them to sane values here.
+ *
+ * NOTE that reparent_to_init() gives the caller full capabilities.
+ */
+void reparent_to_init(void)
+{
+	write_lock_irq(&tasklist_lock);
+
+	/* Reparent to init */
+	REMOVE_LINKS(current);
+	current->p_pptr = child_reaper;
+	current->p_opptr = child_reaper;
+	SET_LINKS(current);
+
+	/* Set the exit signal to SIGCHLD so we signal init on exit */
+	current->exit_signal = SIGCHLD;
+
+	current->ptrace = 0;
+	if ((current->policy == SCHED_OTHER) && (task_nice(current) < 0))
+		set_user_nice(current, 0);
+	/* cpus_allowed? */
+	/* rt_priority? */
+	/* signals? */
+	current->cap_effective = CAP_INIT_EFF_SET;
+	current->cap_inheritable = CAP_INIT_INH_SET;
+	current->cap_permitted = CAP_FULL_SET;
+	current->keep_capabilities = 0;
+	memcpy(current->rlim, init_task.rlim, sizeof(*(current->rlim)));
+	current->user = INIT_USER;
+
+	write_unlock_irq(&tasklist_lock);
+}
+
+/*
+ *     Put all the gunge required to become a kernel thread without
+ *     attached user resources in one place where it belongs.
+ */
+
+void daemonize(void)
+{
+	struct fs_struct *fs;
+
+
+	/*
+	 * If we were started as result of loading a module, close all of the
+	 * user space pages.  We don't need them, and if we didn't close them
+	 * they would be locked into memory.
+	 */
+	exit_mm(current);
+
+	current->session = 1;
+	current->pgrp = 1;
+	current->tty = NULL;
+
+	/* Become as one with the init task */
+
+	exit_fs(current);       /* current->fs->count--; */
+	fs = init_task.fs;
+	current->fs = fs;
+	atomic_inc(&fs->count);
+	exit_files(current);
+	current->files = init_task.files;
+	atomic_inc(&current->files->count);
+ }
+
 /*
  * When we die, we re-parent all our children.
  * Try to give them to another thread in our thread
@@ -196,6 +242,7 @@
 			}
 			i++;
 			set >>= 1;
+			conditional_schedule();		/* sys_exit, many files open */
 		}
 	}
 }
@@ -282,7 +329,9 @@
 	current->mm = NULL;
 	/* active_mm is still 'mm' */
 	atomic_inc(&mm->mm_count);
+	preempt_disable();
 	enter_lazy_tlb(mm, current, smp_processor_id());
+	preempt_enable();
 	return mm;
 }
 
@@ -313,8 +362,8 @@
 		/* more a memory barrier than a real lock */
 		task_lock(tsk);
 		tsk->mm = NULL;
-		task_unlock(tsk);
 		enter_lazy_tlb(mm, current, smp_processor_id());
+		task_unlock(tsk);
 		mmput(mm);
 	}
 }
@@ -435,6 +484,13 @@
 	tsk->flags |= PF_EXITING;
 	del_timer_sync(&tsk->real_timer);
 
+#if 0
+	if (unlikely(preempt_get_count()))
+		printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
+				current->comm, current->pid,
+				preempt_get_count());
+#endif
+
 fake_volatile:
 #ifdef CONFIG_BSD_PROCESS_ACCT
 	acct_process(code);
diff -Nur linux-2.4.33-imedia/kernel/fork.c linux-2.4.33-imedia-patching/kernel/fork.c
--- linux-2.4.33-imedia/kernel/fork.c	2005-01-19 16:10:13.000000000 +0200
+++ linux-2.4.33-imedia-patching/kernel/fork.c	2006-01-26 15:19:43.000000000 +0200
@@ -31,7 +31,6 @@
 
 /* The idle threads do not count.. */
 int nr_threads;
-int nr_running;
 
 int max_threads;
 unsigned long total_forks;	/* Handle normal Linux uptimes. */
@@ -39,6 +38,8 @@
 
 struct task_struct *pidhash[PIDHASH_SZ];
 
+rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;	/* outer */
+
 void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
 {
 	unsigned long flags;
@@ -700,6 +701,13 @@
 	if (p->binfmt && p->binfmt->module)
 		__MOD_INC_USE_COUNT(p->binfmt->module);
 
+#ifdef CONFIG_PREEMPT
+	/*
+	 * Continue with preemption disabled as part of the context
+	 * switch, so start with preempt_count set to 1.
+	 */
+	p->preempt_count = 1;
+#endif
 	p->did_exec = 0;
 	p->swappable = 0;
 	p->state = TASK_UNINTERRUPTIBLE;
@@ -709,8 +717,7 @@
 	if (p->pid == 0 && current->pid != 0)
 		goto bad_fork_cleanup;
 
-	p->run_list.next = NULL;
-	p->run_list.prev = NULL;
+	INIT_LIST_HEAD(&p->run_list);
 
 	p->p_cptr = NULL;
 	init_waitqueue_head(&p->wait_chldexit);
@@ -736,14 +743,15 @@
 #ifdef CONFIG_SMP
 	{
 		int i;
-		p->cpus_runnable = ~0UL;
-		p->processor = current->processor;
+
 		/* ?? should we just memset this ?? */
 		for(i = 0; i < smp_num_cpus; i++)
-			p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
+			p->per_cpu_utime[cpu_logical_map(i)] =
+				p->per_cpu_stime[cpu_logical_map(i)] = 0;
 		spin_lock_init(&p->sigmask_lock);
 	}
 #endif
+	p->array = NULL;
 	p->lock_depth = -1;		/* -1 = no lock */
 	p->start_time = jiffies;
 
@@ -778,15 +786,27 @@
 	p->pdeath_signal = 0;
 
 	/*
-	 * "share" dynamic priority between parent and child, thus the
-	 * total amount of dynamic priorities in the system doesn't change,
-	 * more scheduling fairness. This is only important in the first
-	 * timeslice, on the long run the scheduling behaviour is unchanged.
-	 */
-	p->counter = (current->counter + 1) >> 1;
-	current->counter >>= 1;
-	if (!current->counter)
-		current->need_resched = 1;
+	 * Share the timeslice between parent and child, thus the
+	 * total amount of pending timeslices in the system doesnt change,
+	 * resulting in more scheduling fairness.
+	 */
+	__cli();
+	if (!current->time_slice)
+		BUG();
+	p->time_slice = (current->time_slice + 1) >> 1;
+	p->first_time_slice = 1;
+	current->time_slice >>= 1;
+	if (!current->time_slice) {
+		/*
+		 * This case is rare, it happens when the parent has only
+		 * a single jiffy left from its timeslice. Taking the
+		 * runqueue lock is not a problem.
+		 */
+		current->time_slice = 1;
+		scheduler_tick(0,0);
+	}
+	p->timestamp = jiffies;
+	__sti();
 
 	/*
 	 * Ok, add it to the run-queues and make it
@@ -823,10 +843,16 @@
 	if (p->ptrace & PT_PTRACED)
 		send_sig(SIGSTOP, p, 1);
 
-	wake_up_process(p);		/* do this last */
+	wake_up_forked_process(p);	/* do this last */
 	++total_forks;
 	if (clone_flags & CLONE_VFORK)
 		wait_for_completion(&vfork);
+	else
+		/*
+		 * Let the child process run first, to avoid most of the
+		 * COW overhead when the child exec()s afterwards.
+		 */
+		current->need_resched = 1;
 
 fork_out:
 	return retval;
diff -Nur linux-2.4.33-imedia/kernel/ksyms.c linux-2.4.33-imedia-patching/kernel/ksyms.c
--- linux-2.4.33-imedia/kernel/ksyms.c	2004-02-18 15:36:32.000000000 +0200
+++ linux-2.4.33-imedia-patching/kernel/ksyms.c	2006-01-26 15:19:43.000000000 +0200
@@ -461,30 +461,44 @@
 /* process management */
 EXPORT_SYMBOL(complete_and_exit);
 EXPORT_SYMBOL(__wake_up);
-EXPORT_SYMBOL(__wake_up_sync);
 EXPORT_SYMBOL(wake_up_process);
 EXPORT_SYMBOL(sleep_on);
 EXPORT_SYMBOL(sleep_on_timeout);
 EXPORT_SYMBOL(interruptible_sleep_on);
 EXPORT_SYMBOL(interruptible_sleep_on_timeout);
 EXPORT_SYMBOL(schedule);
+#ifdef CONFIG_PREEMPT
+EXPORT_SYMBOL(preempt_schedule);
+#endif
 EXPORT_SYMBOL(schedule_timeout);
 #if CONFIG_SMP
 EXPORT_SYMBOL(set_cpus_allowed);
+EXPORT_SYMBOL(__wake_up_sync);
 #endif
 EXPORT_SYMBOL(yield);
+EXPORT_SYMBOL(set_user_nice);
+EXPORT_SYMBOL(task_nice);
+EXPORT_SYMBOL_GPL(idle_cpu);
 EXPORT_SYMBOL(__cond_resched);
 EXPORT_SYMBOL(jiffies);
 EXPORT_SYMBOL(xtime);
 EXPORT_SYMBOL(do_gettimeofday);
 EXPORT_SYMBOL(do_settimeofday);
 
+#if LOWLATENCY_NEEDED
+EXPORT_SYMBOL(set_running_and_schedule);
+#ifdef CONFIG_LOLAT_SYSCTL
+EXPORT_SYMBOL(__enable_lowlatency);
+#endif
+#endif
+
 #if !defined(__ia64__)
 EXPORT_SYMBOL(loops_per_jiffy);
 #endif
 
 EXPORT_SYMBOL(kstat);
 EXPORT_SYMBOL(nr_running);
+EXPORT_SYMBOL(nr_context_switches);
 
 /* misc */
 EXPORT_SYMBOL(panic);
diff -Nur linux-2.4.33-imedia/kernel/module.c linux-2.4.33-imedia-patching/kernel/module.c
--- linux-2.4.33-imedia/kernel/module.c	2003-08-25 14:44:44.000000000 +0300
+++ linux-2.4.33-imedia-patching/kernel/module.c	2006-01-26 15:19:43.000000000 +0200
@@ -1187,6 +1187,11 @@
 		return ERR_PTR(-ENOMEM);
 	lock_kernel();
 	for (v = module_list, n = *pos; v; n -= v->nsyms, v = v->next) {
+#if 0
+		/* We can't actually do this, because we'd create a
+		 * race against module unload.  Need a semaphore. */
+		conditional_schedule();
+#endif
 		if (n < v->nsyms) {
 			p->mod = v;
 			p->index = n;
diff -Nur linux-2.4.33-imedia/kernel/ptrace.c linux-2.4.33-imedia-patching/kernel/ptrace.c
--- linux-2.4.33-imedia/kernel/ptrace.c	2006-01-11 20:29:28.000000000 +0200
+++ linux-2.4.33-imedia-patching/kernel/ptrace.c	2006-01-26 15:19:43.000000000 +0200
@@ -32,20 +32,7 @@
 		if (child->state != TASK_STOPPED)
 			return -ESRCH;
 #ifdef CONFIG_SMP
-		/* Make sure the child gets off its CPU.. */
-		for (;;) {
-			task_lock(child);
-			if (!task_has_cpu(child))
-				break;
-			task_unlock(child);
-			do {
-				if (child->state != TASK_STOPPED)
-					return -ESRCH;
-				barrier();
-				cpu_relax();
-			} while (task_has_cpu(child));
-		}
-		task_unlock(child);
+		wait_task_inactive(child);
 #endif		
 	}
 
diff -Nur linux-2.4.33-imedia/kernel/sched.c linux-2.4.33-imedia-patching/kernel/sched.c
--- linux-2.4.33-imedia/kernel/sched.c	2004-11-17 13:54:22.000000000 +0200
+++ linux-2.4.33-imedia-patching/kernel/sched.c	2006-01-26 15:19:43.000000000 +0200
@@ -1,341 +1,558 @@
 /*
- *  linux/kernel/sched.c
+ *  kernel/sched.c
  *
  *  Kernel scheduler and related syscalls
  *
- *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 1991-2002  Linus Torvalds
  *
  *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and
  *              make semaphores SMP safe
  *  1998-11-19	Implemented schedule_timeout() and related stuff
  *		by Andrea Arcangeli
  *  1998-12-28  Implemented better SMP scheduling by Ingo Molnar
+ *  2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
+ *             hybrid priority-list and round-robin design with
+ *             an array-switch method of distributing timeslices
+ *             and per-CPU runqueues.  Additional code by Davide
+ *             Libenzi, Robert Love, and Rusty Russell.
  */
 
-/*
- * 'sched.c' is the main kernel file. It contains scheduling primitives
- * (sleep_on, wakeup, schedule etc) as well as a number of simple system
- * call functions (type getpid()), which just extract a field from
- * current-task
- */
-
-#include <linux/config.h>
 #include <linux/mm.h>
+#include <linux/nmi.h>
 #include <linux/init.h>
+#include <asm/uaccess.h>
+#include <linux/highmem.h>
 #include <linux/smp_lock.h>
-#include <linux/nmi.h>
+#include <asm/mmu_context.h>
 #include <linux/interrupt.h>
-#include <linux/kernel_stat.h>
 #include <linux/completion.h>
-#include <linux/prefetch.h>
-#include <linux/compiler.h>
-
-#include <asm/uaccess.h>
-#include <asm/mmu_context.h>
-
-extern void timer_bh(void);
-extern void tqueue_bh(void);
-extern void immediate_bh(void);
+#include <linux/kernel_stat.h>
 
 /*
- * scheduler variables
- */
+ * Convert user-nice values [ -20 ... 0 ... 19 ]
+ * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
+ * and back.
+ */
+#define NICE_TO_PRIO(nice)	(MAX_RT_PRIO + (nice) + 20)
+#define PRIO_TO_NICE(prio)	((prio) - MAX_RT_PRIO - 20)
+#define TASK_NICE(p)		PRIO_TO_NICE((p)->static_prio)
 
-unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
+/*
+ * 'User priority' is the nice value converted to something we
+ * can work with better when scaling various scheduler parameters,
+ * it's a [ 0 ... 39 ] range.
+ */
+#define USER_PRIO(p)		((p)-MAX_RT_PRIO)
+#define TASK_USER_PRIO(p)	USER_PRIO((p)->static_prio)
+#define MAX_USER_PRIO		(USER_PRIO(MAX_PRIO))
+#define AVG_TIMESLICE	(MAX_TIMESLICE * MAX_PRIO/MAX_USER_PRIO)
 
-extern void mem_use(void);
+/*
+ * These are the 'tuning knobs' of the scheduler:
+ *
+ * Minimum timeslice is 10 msecs, default timeslice is 100 msecs,
+ * maximum timeslice is 200 msecs. Timeslices get refilled after
+ * they expire.
+ */
+#define MIN_TIMESLICE		((10 * HZ) / 1000 ?: 1)
+#define MAX_TIMESLICE		((200 * HZ) / 1000 ?: 1)
+#define ON_RUNQUEUE_WEIGHT	30
+#define CHILD_PENALTY		95
+#define PARENT_PENALTY		100
+#define EXIT_WEIGHT		3
+#define PRIO_BONUS_RATIO	25
+#define INTERACTIVE_DELTA	2
+#define CREDIT_LIMIT		100
+#define MAX_BONUS		(MAX_USER_PRIO * PRIO_BONUS_RATIO / 100)
+#define MAX_SLEEP_AVG		(AVG_TIMESLICE * MAX_BONUS)
+#define STARVATION_LIMIT	(MAX_SLEEP_AVG)
 
 /*
- * Scheduling quanta.
+ * If a task is 'interactive' then we reinsert it in the active
+ * array after it has expired its current timeslice. (it will not
+ * continue to run immediately, it will still roundrobin with
+ * other interactive tasks.)
+ *
+ * This part scales the interactivity limit depending on niceness.
  *
- * NOTE! The unix "nice" value influences how long a process
- * gets. The nice value ranges from -20 to +19, where a -20
- * is a "high-priority" task, and a "+10" is a low-priority
- * task.
+ * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
+ * Here are a few examples of different nice levels:
  *
- * We want the time-slice to be around 50ms or so, so this
- * calculation depends on the value of HZ.
+ *  TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
+ *  TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
+ *  TASK_INTERACTIVE(  0): [1,1,1,1,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
+ *
+ * (the X axis represents the possible -5 ... 0 ... +5 dynamic
+ *  priority range a task can explore, a value of '1' means the
+ *  task is rated interactive.)
+ *
+ * Ie. nice +19 tasks can never get 'interactive' enough to be
+ * reinserted into the active array. And only heavily CPU-hog nice -20
+ * tasks will be expired. Default nice 0 tasks are somewhere between,
+ * it takes some effort for them to get interactive, but it's not
+ * too hard.
  */
-#if HZ < 200
-#define TICK_SCALE(x)	((x) >> 2)
-#elif HZ < 400
-#define TICK_SCALE(x)	((x) >> 1)
-#elif HZ < 800
-#define TICK_SCALE(x)	(x)
-#elif HZ < 1600
-#define TICK_SCALE(x)	((x) << 1)
-#else
-#define TICK_SCALE(x)	((x) << 2)
-#endif
 
-#define NICE_TO_TICKS(nice)	(TICK_SCALE(20-(nice))+1)
+#define SCALE(v1,v1_max,v2_max) \
+	(v1) * (v2_max) / (v1_max)
 
+#define DELTA(p) \
+	(SCALE(TASK_NICE(p), 40, MAX_USER_PRIO*PRIO_BONUS_RATIO/100) + \
+		INTERACTIVE_DELTA)
 
-/*
- *	Init task must be ok at boot for the ix86 as we will check its signals
- *	via the SMP irq return path.
- */
- 
-struct task_struct * init_tasks[NR_CPUS] = {&init_task, };
+#define TASK_INTERACTIVE(p) \
+	((p)->prio <= (p)->static_prio - DELTA(p))
 
-/*
- * The tasklist_lock protects the linked list of processes.
- *
- * The runqueue_lock locks the parts that actually access
- * and change the run-queues, and have to be interrupt-safe.
- *
- * If both locks are to be concurrently held, the runqueue_lock
- * nests inside the tasklist_lock.
- *
- * task->alloc_lock nests inside tasklist_lock.
- */
-spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED;  /* inner */
-rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;	/* outer */
+#define CURRENT_BONUS(p) \
+	((p)->sleep_avg * MAX_BONUS / MAX_SLEEP_AVG)
+
+#define TIMESLICE_GRANULARITY(p)       (MIN_TIMESLICE * \
+		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)))
 
-static LIST_HEAD(runqueue_head);
+#define JUST_INTERACTIVE_SLEEP(p) \
+	(MAX_SLEEP_AVG * (MAX_BONUS / 2 + DELTA((p)) + 1) / MAX_BONUS - 1)
+
+#define HIGH_CREDIT(p) \
+	((p)->interactive_credit > CREDIT_LIMIT)
+
+#define LOW_CREDIT(p) \
+	((p)->interactive_credit < -CREDIT_LIMIT)
 
 /*
- * We align per-CPU scheduling data on cacheline boundaries,
- * to prevent cacheline ping-pong.
+ * BASE_TIMESLICE scales user-nice values [ -20 ... 19 ]
+ * to time slice values.
+ *
+ * The higher a process's priority, the bigger timeslices
+ * it gets during one round of execution. But even the lowest
+ * priority process gets MIN_TIMESLICE worth of execution time.
+ *
+ * task_timeslice() is the interface that is used by the scheduler.
+ * SCHED_BATCH tasks get longer timeslices to make use of better
+ * caching. They are inherently noninteractive and they are
+ * immediately preempted by SCHED_NORMAL tasks so there is no
+ * downside in using shorter timeslices.
  */
-static union {
-	struct schedule_data {
-		struct task_struct * curr;
-		cycles_t last_schedule;
-	} schedule_data;
-	char __pad [SMP_CACHE_BYTES];
-} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};
-
-#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr
-#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule
 
-struct kernel_stat kstat;
-extern struct task_struct *child_reaper;
+#define BASE_TIMESLICE(p) \
+	(MAX_TIMESLICE * (MAX_PRIO-(p)->static_prio)/MAX_USER_PRIO)
 
-#ifdef CONFIG_SMP
+static inline unsigned int task_timeslice(task_t *p)
+{
+	unsigned int time_slice = BASE_TIMESLICE(p);
 
-#define idle_task(cpu) (init_tasks[cpu_number_map(cpu)])
-#define can_schedule(p,cpu) \
-	((p)->cpus_runnable & (p)->cpus_allowed & (1UL << cpu))
+	if (time_slice < MIN_TIMESLICE)
+		time_slice = MIN_TIMESLICE;
+	if (p->policy == SCHED_BATCH)
+		return time_slice * 20;
+	else
+		return time_slice;
+}
 
-#else
+/*
+ * These are the runqueue data structures:
+ */
 
-#define idle_task(cpu) (&init_task)
-#define can_schedule(p,cpu) (1)
+#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
 
-#endif
+typedef struct runqueue runqueue_t;
 
-void scheduling_functions_start_here(void) { }
+struct prio_array {
+	int nr_active;
+	unsigned long bitmap[BITMAP_SIZE];
+	list_t queue[MAX_PRIO];
+};
 
 /*
- * This is the function that decides how desirable a process is..
- * You can weigh different processes against each other depending
- * on what CPU they've run on lately etc to try to handle cache
- * and TLB miss penalties.
+ * This is the main, per-CPU runqueue data structure.
  *
- * Return values:
- *	 -1000: never select this
- *	     0: out of time, recalculate counters (but it might still be
- *		selected)
- *	   +ve: "goodness" value (the larger, the better)
- *	 +1000: realtime process, select this.
+ * Locking rule: those places that want to lock multiple runqueues
+ * (such as the load balancing or the process migration code), lock
+ * acquire operations must be ordered by ascending &runqueue.
  */
+struct runqueue {
+	spinlock_t lock;
+	unsigned long nr_running, nr_switches, expired_timestamp,
+			nr_uninterruptible;
+	task_t *curr, *idle;
+	prio_array_t *active, *expired, arrays[2];
+	int best_expired_prio, prev_nr_running[NR_CPUS];
 
-static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm)
-{
-	int weight;
+	task_t *migration_thread;
+	list_t migration_queue;
 
 	/*
-	 * select the current process after every other
-	 * runnable process, but before the idle thread.
-	 * Also, dont trigger a counter recalculation.
+	 * The batch queue is a secondary ready-queue:
 	 */
-	weight = -1;
-	if (p->policy & SCHED_YIELD)
-		goto out;
+	unsigned long nr_batch;
+	list_t batch_queue;
 
 	/*
-	 * Non-RT process - normal case first.
+	 * Per-CPU idle CPU time tracking:
+	 *
+	 * - idle_ticks_left counts back from HZ to 0.
+	 * - idle_count is the number of idle ticks in the last second.
+	 * - once it reaches 0, a new idle_avg is calculated.
 	 */
-	if (p->policy == SCHED_OTHER) {
-		/*
-		 * Give the process a first-approximation goodness value
-		 * according to the number of clock-ticks it has left.
-		 *
-		 * Don't do any other calculations if the time slice is
-		 * over..
-		 */
-		weight = p->counter;
-		if (!weight)
-			goto out;
-			
-#ifdef CONFIG_SMP
-		/* Give a largish advantage to the same processor...   */
-		/* (this is equivalent to penalizing other processors) */
-		if (p->processor == this_cpu)
-			weight += PROC_CHANGE_PENALTY;
+	#define IDLE_TICKS (HZ)
+
+	unsigned int idle_ticks_left, idle_count, idle_avg;
+
+} ____cacheline_aligned;
+
+static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
+
+#define cpu_rq(cpu)		(runqueues + (cpu))
+#define this_rq()		cpu_rq(smp_processor_id())
+#define task_rq(p)		cpu_rq(task_cpu(p))
+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
+#define rt_task(p)		((p)->prio < MAX_RT_PRIO)
+
+/*
+ * Default context-switch locking:
+ */
+#ifndef prepare_arch_switch
+# define prepare_arch_switch(rq, next)	do { } while(0)
+# define finish_arch_switch(rq, next)	spin_unlock_irq(&(rq)->lock)
+# define task_running(rq, p)		((rq)->curr == (p))
 #endif
 
-		/* .. and a slight advantage to the current MM */
-		if (p->mm == this_mm || !p->mm)
-			weight += 1;
-		weight += 20 - p->nice;
-		goto out;
+/*
+ * task_rq_lock - lock the runqueue a given task resides on and disable
+ * interrupts.  Note the ordering: we can safely lookup the task_rq without
+ * explicitly disabling preemption.
+ */
+static inline runqueue_t *task_rq_lock(task_t *p, unsigned long *flags)
+{
+	struct runqueue *rq;
+
+repeat_lock_task:
+	preempt_disable();
+	local_irq_save(*flags);
+	rq = task_rq(p);
+	spin_lock(&rq->lock);
+	if (unlikely(rq != task_rq(p))) {
+		spin_unlock_irqrestore(&rq->lock, *flags);
+		preempt_enable();
+		goto repeat_lock_task;
 	}
+	return rq;
+}
 
-	/*
-	 * Realtime process, select the first one on the
-	 * runqueue (taking priorities within processes
-	 * into account).
-	 */
-	weight = 1000 + p->rt_priority;
-out:
-	return weight;
+static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
+{
+	spin_unlock_irqrestore(&rq->lock, *flags);
+	preempt_enable();
 }
 
 /*
- * the 'goodness value' of replacing a process on a given CPU.
- * positive value means 'replace', zero or negative means 'dont'.
+ * rq_lock - lock a given runqueue and disable interrupts.
  */
-static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu)
+static inline runqueue_t *this_rq_lock(void)
 {
-	return goodness(p, cpu, prev->active_mm) - goodness(prev, cpu, prev->active_mm);
+	runqueue_t *rq;
+
+	local_irq_disable();
+	rq = this_rq();
+	spin_lock(&rq->lock);
+
+	return rq;
+}
+
+static inline void rq_unlock(runqueue_t *rq)
+{
+	spin_unlock(&rq->lock);
+	local_irq_enable();
 }
 
 /*
- * This is ugly, but reschedule_idle() is very timing-critical.
- * We are called with the runqueue spinlock held and we must
- * not claim the tasklist_lock.
+ * Adding/removing a task to/from a priority array:
  */
-static FASTCALL(void reschedule_idle(struct task_struct * p));
+static inline void dequeue_task(struct task_struct *p, prio_array_t *array)
+{
+	array->nr_active--;
+	list_del(&p->run_list);
+	if (list_empty(array->queue + p->prio))
+		__clear_bit(p->prio, array->bitmap);
+}
 
-static void fastcall reschedule_idle(struct task_struct * p)
+static inline void enqueue_task(struct task_struct *p, prio_array_t *array)
 {
-#ifdef CONFIG_SMP
-	int this_cpu = smp_processor_id();
-	struct task_struct *tsk, *target_tsk;
-	int cpu, best_cpu, i, max_prio;
-	cycles_t oldest_idle;
+	list_add_tail(&p->run_list, array->queue + p->prio);
+	__set_bit(p->prio, array->bitmap);
+	array->nr_active++;
+	p->array = array;
+}
 
-	/*
-	 * shortcut if the woken up task's last CPU is
-	 * idle now.
-	 */
-	best_cpu = p->processor;
-	if (can_schedule(p, best_cpu)) {
-		tsk = idle_task(best_cpu);
-		if (cpu_curr(best_cpu) == tsk) {
-			int need_resched;
-send_now_idle:
-			/*
-			 * If need_resched == -1 then we can skip sending
-			 * the IPI altogether, tsk->need_resched is
-			 * actively watched by the idle thread.
-			 */
-			need_resched = tsk->need_resched;
-			tsk->need_resched = 1;
-			if ((best_cpu != this_cpu) && !need_resched)
-				smp_send_reschedule(best_cpu);
-			return;
-		}
-	}
+static inline int effective_prio(task_t *p)
+{
+	int bonus, prio;
+
+	if (rt_task(p))
+		return p->prio;
 
 	/*
-	 * We know that the preferred CPU has a cache-affine current
-	 * process, lets try to find a new idle CPU for the woken-up
-	 * process. Select the least recently active idle CPU. (that
-	 * one will have the least active cache context.) Also find
-	 * the executing process which has the least priority.
-	 */
-	oldest_idle = (cycles_t) -1;
-	target_tsk = NULL;
-	max_prio = 0;
-
-	for (i = 0; i < smp_num_cpus; i++) {
-		cpu = cpu_logical_map(i);
-		if (!can_schedule(p, cpu))
-			continue;
-		tsk = cpu_curr(cpu);
+	 * Here we scale the actual sleep average [0 .... MAX_SLEEP_AVG]
+	 * into the -5 ... 0 ... +5 bonus/penalty range.
+	 *
+	 * We use 25% of the full 0...39 priority range so that:
+	 *
+	 * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
+	 * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
+	 *
+	 * Both properties are important to certain workloads.
+	 */
+	bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
+
+	prio = p->static_prio - bonus;
+	if (prio < MAX_RT_PRIO)
+		prio = MAX_RT_PRIO;
+	if (prio > MAX_PRIO-1)
+		prio = MAX_PRIO-1;
+	return prio;
+}
+
+/*
+ * __activate_task - move a task to the runqueue.
+ */
+static inline void __activate_task(task_t *p, runqueue_t *rq)
+{
+	enqueue_task(p, rq->active);
+	rq->nr_running++;
+}
+
+static void recalc_task_prio(task_t *p, unsigned long now)
+{
+	unsigned long sleep_time = now - p->timestamp;
+
+	if (sleep_time > MAX_SLEEP_AVG)
+		sleep_time = MAX_SLEEP_AVG;
+
+	if (likely(sleep_time > 0)) {
 		/*
-		 * We use the first available idle CPU. This creates
-		 * a priority list between idle CPUs, but this is not
-		 * a problem.
+		 * User tasks that sleep a long time are categorised as
+		 * idle and will get just interactive status to stay active &
+		 * prevent them suddenly becoming cpu hogs and starving
+		 * other processes.
 		 */
-		if (tsk == idle_task(cpu)) {
-#if defined(__i386__) && defined(CONFIG_SMP)
-                        /*
-			 * Check if two siblings are idle in the same
-			 * physical package. Use them if found.
+		if (p->mm && p->activated != -1 &&
+			sleep_time > JUST_INTERACTIVE_SLEEP(p)){
+				p->sleep_avg = MAX_SLEEP_AVG -
+						AVG_TIMESLICE;
+				if (!HIGH_CREDIT(p))
+					p->interactive_credit++;
+		} else {
+			/*
+			 * The lower the sleep avg a task has the more
+			 * rapidly it will rise with sleep time.
+			 */
+			sleep_time *= (MAX_BONUS - CURRENT_BONUS(p)) ? : 1;
+
+			/*
+			 * Tasks with low interactive_credit are limited to
+			 * one timeslice worth of sleep avg bonus.
+			 */
+			if (LOW_CREDIT(p) &&
+				sleep_time > task_timeslice(p))
+					sleep_time = task_timeslice(p);
+
+			/*
+			 * Non high_credit tasks waking from uninterruptible
+			 * sleep are limited in their sleep_avg rise as they
+			 * are likely to be cpu hogs waiting on I/O
 			 */
-			if (smp_num_siblings == 2) {
-				if (cpu_curr(cpu_sibling_map[cpu]) == 
-			            idle_task(cpu_sibling_map[cpu])) {
-					oldest_idle = last_schedule(cpu);
-					target_tsk = tsk;
-					break;
-				}
-				
-                        }
-#endif		
-			if (last_schedule(cpu) < oldest_idle) {
-				oldest_idle = last_schedule(cpu);
-				target_tsk = tsk;
+			if (p->activated == -1 && !HIGH_CREDIT(p) && p->mm){
+				if (p->sleep_avg >= JUST_INTERACTIVE_SLEEP(p))
+					sleep_time = 0;
+				else if (p->sleep_avg + sleep_time >=
+					JUST_INTERACTIVE_SLEEP(p)){
+						p->sleep_avg =
+							JUST_INTERACTIVE_SLEEP(p);
+						sleep_time = 0;
+					}
 			}
-		} else {
-			if (oldest_idle == (cycles_t)-1) {
-				int prio = preemption_goodness(tsk, p, cpu);
 
-				if (prio > max_prio) {
-					max_prio = prio;
-					target_tsk = tsk;
-				}
+			/*
+			 * This code gives a bonus to interactive tasks.
+			 *
+			 * The boost works by updating the 'average sleep time'
+			 * value here, based on ->timestamp. The more time a task
+			 * spends sleeping, the higher the average gets - and the
+			 * higher the priority boost gets as well.
+			 */
+			p->sleep_avg += sleep_time;
+
+			if (p->sleep_avg > MAX_SLEEP_AVG){
+				p->sleep_avg = MAX_SLEEP_AVG;
+				if (!HIGH_CREDIT(p))
+					p->interactive_credit++;
 			}
 		}
 	}
-	tsk = target_tsk;
-	if (tsk) {
-		if (oldest_idle != (cycles_t)-1) {
-			best_cpu = tsk->processor;
-			goto send_now_idle;
+
+	p->prio = effective_prio(p);
+}
+
+static inline void activate_task(task_t *p, runqueue_t *rq)
+{
+	recalc_task_prio(p, jiffies);
+
+	/*
+	 * This checks to make sure it's not an uninterruptible task
+	 * that is now waking up.
+	 */
+	if (!p->activated){
+		/*
+		 * Tasks which were woken up by interrupts (ie. hw events)
+		 * are most likely of interactive nature. So we give them
+		 * the credit of extending their sleep time to the period
+		 * of time they spend on the runqueue, waiting for execution
+		 * on a CPU, first time around:
+		 */
+		if (in_interrupt())
+			p->activated = 2;
+		else
+		/*
+		 * Normal first-time wakeups get a credit too for on-runqueue
+		 * time, but it will be weighted down:
+		 */
+			p->activated = 1;
 		}
-		tsk->need_resched = 1;
-		if (tsk->processor != this_cpu)
-			smp_send_reschedule(tsk->processor);
-	}
-	return;
-		
+	p->timestamp = jiffies;
+
+	__activate_task(p, rq);
+}
 
-#else /* UP */
-	int this_cpu = smp_processor_id();
+static inline void activate_batch_task(task_t *p, runqueue_t *rq)
+{
+	rq->nr_batch--;
+	list_del(&p->run_list);
+	activate_task(p, rq);
+	p->flags &= ~PF_BATCH;
+}
+
+static inline void deactivate_task(struct task_struct *p, runqueue_t *rq)
+{
+	rq->nr_running--;
+	if (p->state == TASK_UNINTERRUPTIBLE)
+		rq->nr_uninterruptible++;
+	dequeue_task(p, p->array);
+	p->array = NULL;
+}
+
+static inline void deactivate_batch_task(task_t *p, runqueue_t *rq)
+{
+	prio_array_t *array = p->array;
+
+	deactivate_task(p, rq);
+	rq->nr_batch++;
+	if (array == rq->expired)
+		list_add_tail(&p->run_list, &rq->batch_queue);
+	else
+		list_add(&p->run_list, &rq->batch_queue);
+	/*
+	 * Via this bit we can tell whether a task is in the batchqueue,
+	 * this information is not available in any other cheap way.
+	 */
+	p->flags |= PF_BATCH;
+}
+
+static inline void resched_task(task_t *p)
+{
+#if CONFIG_SMP
+	int need_resched;
 	struct task_struct *tsk;
+	int i,cpu;
 
-	tsk = cpu_curr(this_cpu);
-	if (preemption_goodness(tsk, p, this_cpu) > 0)
-		tsk->need_resched = 1;
+	preempt_disable();
+	need_resched = p->need_resched;
+	wmb();
+	set_tsk_need_resched(p);
+	if (!need_resched && (p->cpu != smp_processor_id()))
+		smp_send_reschedule(p->cpu);
+	preempt_enable();
+#if LOWLATENCY_NEEDED
+	if (enable_lowlatency && (p->policy != SCHED_OTHER)) {
+		struct task_struct *t;
+		for (i = 0; i < smp_num_cpus; i++) {
+			cpu = cpu_logical_map(i);
+			t = cpu_curr(cpu);
+			if (t != tsk)
+				t->need_resched = 1;
+		}
+	}
+#endif
+#else
+	set_tsk_need_resched(p);
 #endif
 }
 
+#ifdef CONFIG_SMP
+
 /*
- * Careful!
- *
- * This has to add the process to the _end_ of the 
- * run-queue, not the beginning. The goodness value will
- * determine whether this process will run next. This is
- * important to get SCHED_FIFO and SCHED_RR right, where
- * a process that is either pre-empted or its time slice
- * has expired, should be moved to the tail of the run 
- * queue for its priority - Bhavesh Davda
+ * Wait for a process to unschedule. This is used by the exit() and
+ * ptrace() code.
  */
-static inline void add_to_runqueue(struct task_struct * p)
+void wait_task_inactive(task_t * p)
 {
-	list_add_tail(&p->run_list, &runqueue_head);
-	nr_running++;
+	unsigned long flags;
+	runqueue_t *rq;
+
+repeat:
+	preempt_disable();
+	rq = task_rq(p);
+	if (unlikely(task_running(rq, p))) {
+		cpu_relax();
+		/*
+		 * enable/disable preemption just to make this
+		 * a preemption point - we are busy-waiting
+		 * anyway.
+		 */
+		preempt_enable();
+		goto repeat;
+	}
+	rq = task_rq_lock(p, &flags);
+	if (unlikely(task_running(rq, p))) {
+		task_rq_unlock(rq, &flags);
+		preempt_enable();
+		goto repeat;
+	}
+	task_rq_unlock(rq, &flags);
+	preempt_enable();
 }
+#endif
 
-static inline void move_last_runqueue(struct task_struct * p)
+/*
+ * Kick the remote CPU if the task is running currently,
+ * this code is used by the signal code to signal tasks
+ * which are in user-mode as quickly as possible.
+ *
+ * (Note that we do this lockless - if the task does anything
+ * while the message is in flight then it will notice the
+ * sigpending condition anyway.)
+ *
+ * this code also activates batch processes if they get a signal.
+ */
+void kick_if_running(task_t * p)
 {
-	list_del(&p->run_list);
-	list_add_tail(&p->run_list, &runqueue_head);
+	if (task_running(task_rq(p), p) && (p->cpu != smp_processor_id()))
+		resched_task(p);
+	/*
+	 * If batch processes get signals but are not running currently
+	 * then give them a chance to handle the signal. (the kernel
+	 * side signal handling code will run for sure, the userspace
+	 * part depends on system load and might be delayed indefinitely.)
+	 */
+	if (p->policy == SCHED_BATCH) {
+		unsigned long flags;
+		runqueue_t *rq;
+
+		rq = task_rq_lock(p, &flags);
+		if (p->flags & PF_BATCH)
+			activate_batch_task(p, rq);
+		task_rq_unlock(rq, &flags);
+	}
 }
 
 /*
@@ -345,416 +562,902 @@
  * progress), and as such you're allowed to do the simpler
  * "current->state = TASK_RUNNING" to mark yourself runnable
  * without the overhead of this.
+ *
+ * returns failure only if the task is already active.
  */
-static inline int try_to_wake_up(struct task_struct * p, int synchronous)
+static int try_to_wake_up(task_t * p, int sync)
 {
 	unsigned long flags;
 	int success = 0;
+	long old_state;
+	runqueue_t *rq;
 
-	/*
-	 * We want the common case fall through straight, thus the goto.
-	 */
-	spin_lock_irqsave(&runqueue_lock, flags);
+repeat_lock_task:
+	rq = task_rq_lock(p, &flags);
+	old_state = p->state;
+	if (!p->array) {
+		/*
+		 * Fast-migrate the task if it's not running or runnable
+		 * currently. Do not violate hard affinity.
+		 */
+		if (unlikely(sync && !task_running(rq, p) &&
+			(task_cpu(p) != smp_processor_id()) &&
+			(p->cpus_allowed & (1UL << smp_processor_id())))) {
+
+			set_task_cpu(p, smp_processor_id());
+
+			task_rq_unlock(rq, &flags);
+			goto repeat_lock_task;
+		}
+		if (old_state == TASK_UNINTERRUPTIBLE){
+			/*
+			 * Limit tasks waking from UNINTERRUPTIBLE SLEEP to
+			 * just interactive state to prevent cpu hogs getting
+			 * interactive state during disk i/o
+			 */
+			if (p->mm)
+				p->activated = -1;
+			rq->nr_uninterruptible--;
+			}
+		activate_task(p, rq);
+
+		if (p->prio < rq->curr->prio || rq->curr->policy == SCHED_BATCH)
+			resched_task(rq->curr);
+		success = 1;
+	}
 	p->state = TASK_RUNNING;
-	if (task_on_runqueue(p))
-		goto out;
-	add_to_runqueue(p);
-	if (!synchronous || !(p->cpus_allowed & (1UL << smp_processor_id())))
-		reschedule_idle(p);
-	success = 1;
-out:
-	spin_unlock_irqrestore(&runqueue_lock, flags);
+	task_rq_unlock(rq, &flags);
+
 	return success;
 }
 
-inline int fastcall wake_up_process(struct task_struct * p)
+int fastcall wake_up_process(task_t * p)
 {
 	return try_to_wake_up(p, 0);
 }
 
-static void process_timeout(unsigned long __data)
+void fastcall wake_up_forked_process(task_t * p)
 {
-	struct task_struct * p = (struct task_struct *) __data;
+	runqueue_t *rq;
+	preempt_disable();
+	rq = this_rq_lock();
+
+	p->state = TASK_RUNNING;
+	/*
+	 * We decrease the sleep average of forking parents
+	 * and children as well, to keep max-interactive tasks
+	 * from forking tasks that are max-interactive.
+	 */
+	current->sleep_avg = CURRENT_BONUS(current) *
+		PARENT_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS;
+	p->sleep_avg = CURRENT_BONUS(p) *
+		CHILD_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS;
+
+	p->interactive_credit = 0;
 
-	wake_up_process(p);
+	p->prio = effective_prio(p);
+
+	set_task_cpu(p, smp_processor_id());
+	activate_task(p, rq);
+
+	rq_unlock(rq);
+	preempt_enable();
 }
 
-/**
- * schedule_timeout - sleep until timeout
- * @timeout: timeout value in jiffies
- *
- * Make the current task sleep until @timeout jiffies have
- * elapsed. The routine will return immediately unless
- * the current task state has been set (see set_current_state()).
- *
- * You can set the task state as follows -
- *
- * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
- * pass before the routine returns. The routine will return 0
- *
- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task. In this case the remaining time
- * in jiffies will be returned, or 0 if the timer expired in time
- *
- * The current task state is guaranteed to be TASK_RUNNING when this 
- * routine returns.
- *
- * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
- * the CPU away without a bound on the timeout. In this case the return
- * value will be %MAX_SCHEDULE_TIMEOUT.
+/*
+ * Potentially available exiting-child timeslices are
+ * retrieved here - this way the parent does not get
+ * penalized for creating too many processes.
  *
- * In all cases the return value is guaranteed to be non-negative.
+ * (this cannot be used to 'generate' timeslices
+ * artificially, because any timeslice recovered here
+ * was given away by the parent in the first place.)
  */
-signed long fastcall schedule_timeout(signed long timeout)
+void fastcall sched_exit(task_t * p)
 {
-	struct timer_list timer;
-	unsigned long expire;
+	__cli();
+	if (p->first_time_slice) {
+		current->time_slice += p->time_slice;
+		if (unlikely(current->time_slice > MAX_TIMESLICE))
+			current->time_slice = MAX_TIMESLICE;
+	}
+	__sti();
+	/*
+	 * If the child was a (relative-) CPU hog then decrease
+	 * the sleep_avg of the parent as well.
+	 */
+	if (p->sleep_avg < current->sleep_avg)
+		current->sleep_avg = (current->sleep_avg * EXIT_WEIGHT +
+			p->sleep_avg) / (EXIT_WEIGHT + 1);
+}
 
-	switch (timeout)
-	{
-	case MAX_SCHEDULE_TIMEOUT:
-		/*
-		 * These two special cases are useful to be comfortable
-		 * in the caller. Nothing more. We could take
-		 * MAX_SCHEDULE_TIMEOUT from one of the negative value
-		 * but I' d like to return a valid offset (>=0) to allow
-		 * the caller to do everything it want with the retval.
-		 */
-		schedule();
-		goto out;
-	default:
-		/*
-		 * Another bit of PARANOID. Note that the retval will be
-		 * 0 since no piece of kernel is supposed to do a check
-		 * for a negative retval of schedule_timeout() (since it
-		 * should never happens anyway). You just have the printk()
-		 * that will tell you if something is gone wrong and where.
-		 */
-		if (timeout < 0)
-		{
-			printk(KERN_ERR "schedule_timeout: wrong timeout "
-			       "value %lx from %p\n", timeout,
-			       __builtin_return_address(0));
-			current->state = TASK_RUNNING;
-			goto out;
-		}
+#if CONFIG_SMP || CONFIG_PREEMPT
+asmlinkage void schedule_tail(task_t *prev)
+{
+	finish_arch_switch(this_rq(), prev);
+}
+#endif
+
+static inline task_t * context_switch(task_t *prev, task_t *next)
+{
+	struct mm_struct *mm = next->mm;
+	struct mm_struct *oldmm = prev->active_mm;
+
+	if (unlikely(!mm)) {
+		next->active_mm = oldmm;
+		atomic_inc(&oldmm->mm_count);
+		enter_lazy_tlb(oldmm, next, smp_processor_id());
+	} else
+		switch_mm(oldmm, mm, next, smp_processor_id());
+
+	if (unlikely(!prev->mm)) {
+		prev->active_mm = NULL;
+		mmdrop(oldmm);
 	}
 
-	expire = timeout + jiffies;
+	/* Here we just switch the register state and the stack. */
+	switch_to(prev, next, prev);
 
-	init_timer(&timer);
-	timer.expires = expire;
-	timer.data = (unsigned long) current;
-	timer.function = process_timeout;
+	return prev;
+}
 
-	add_timer(&timer);
-	schedule();
-	del_timer_sync(&timer);
+unsigned long nr_running(void)
+{
+	unsigned long i, sum = 0;
+
+	for (i = 0; i < NR_CPUS; i++)
+		sum += cpu_rq(i)->nr_running;
+
+	return sum;
+}
+
+unsigned long nr_uninterruptible(void)
+{
+	unsigned long i, sum = 0;
 
-	timeout = expire - jiffies;
+	for (i = 0; i < NR_CPUS; i++)
+		sum += cpu_rq(i)->nr_uninterruptible;
 
- out:
-	return timeout < 0 ? 0 : timeout;
+	return sum;
+}
+
+unsigned long nr_context_switches(void)
+{
+	unsigned long i, sum = 0;
+
+	for (i = 0; i < NR_CPUS; i++)
+		sum += cpu_rq(i)->nr_switches;
+
+	return sum;
 }
 
 /*
- * schedule_tail() is getting called from the fork return path. This
- * cleans up all remaining scheduler things, without impacting the
- * common case.
+ * double_rq_lock - safely lock two runqueues
+ *
+ * Note this does not disable interrupts like task_rq_lock,
+ * you need to do so manually before calling.
  */
-static inline void __schedule_tail(struct task_struct *prev)
+static inline void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
 {
-#ifdef CONFIG_SMP
-	int policy;
+	if (rq1 == rq2)
+		spin_lock(&rq1->lock);
+	else {
+		if (rq1 < rq2) {
+			spin_lock(&rq1->lock);
+			spin_lock(&rq2->lock);
+		} else {
+			spin_lock(&rq2->lock);
+			spin_lock(&rq1->lock);
+		}
+	}
+}
 
-	/*
-	 * prev->policy can be written from here only before `prev'
-	 * can be scheduled (before setting prev->cpus_runnable to ~0UL).
-	 * Of course it must also be read before allowing prev
-	 * to be rescheduled, but since the write depends on the read
-	 * to complete, wmb() is enough. (the spin_lock() acquired
-	 * before setting cpus_runnable is not enough because the spin_lock()
-	 * common code semantics allows code outside the critical section
-	 * to enter inside the critical section)
+/*
+ * double_rq_unlock - safely unlock two runqueues
+ *
+ * Note this does not restore interrupts like task_rq_unlock,
+ * you need to do so manually after calling.
+ */
+static inline void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
+{
+	spin_unlock(&rq1->lock);
+	if (rq1 != rq2)
+		spin_unlock(&rq2->lock);
+}
+
+#if CONFIG_SMP
+
+/*
+ * Batch balancing is much simpler since it's optimized for
+ * CPU-intensive workloads. The balancer keeps the batch-queue
+ * length as close to the average length as possible. It weighs
+ * runqueue distribution based on the idle percentage of each
+ * CPU - this way statistical fairness of timeslice distribution
+ * is preserved, in the long run it does not matter whether a
+ * batch task is queued to a busy CPU or not, it will get an
+ * equal share of all available idle CPU time.
+ *
+ * CPU-intensive SCHED_BATCH processes have a much lower
+ * fork()/exit() flux, so the balancing does not have to
+ * be prepared for high statistical fluctuations in queue
+ * length.
+ */
+static inline void load_balance_batch(runqueue_t *this_rq, int this_cpu)
+{
+	int i, nr_batch, nr_idle, goal, rq_goal;
+	runqueue_t *rq_src;
+
+	/*
+	 * First the unlocked fastpath - is there any work to do?
+	 * fastpath #1: no batch processes in the system,
+	 * fastpath #2: no idle time available in the system.
+	 * fastpath #3: no balancing needed for the current queue.
 	 */
-	policy = prev->policy;
-	prev->policy = policy & ~SCHED_YIELD;
-	wmb();
+	nr_batch = 0;
+	nr_idle = 0;
 
-	/*
-	 * fast path falls through. We have to clear cpus_runnable before
-	 * checking prev->state to avoid a wakeup race. Protect against
-	 * the task exiting early.
-	 */
-	task_lock(prev);
-	task_release_cpu(prev);
-	mb();
-	if (prev->state == TASK_RUNNING)
-		goto needs_resched;
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_online(i))
+			continue;
 
-out_unlock:
-	task_unlock(prev);	/* Synchronise here with release_task() if prev is TASK_ZOMBIE */
-	return;
+		nr_batch += cpu_rq(i)->nr_batch;
+		nr_idle += cpu_rq(i)->idle_avg;
+	}
+	if (!nr_batch || !nr_idle)
+		return;
+
+	goal = this_rq->idle_avg * nr_batch / nr_idle;
+	if (this_rq->nr_batch >= goal)
+		return;
 
 	/*
-	 * Slow path - we 'push' the previous process and
-	 * reschedule_idle() will attempt to find a new
-	 * processor for it. (but it might preempt the
-	 * current process as well.) We must take the runqueue
-	 * lock and re-check prev->state to be correct. It might
-	 * still happen that this process has a preemption
-	 * 'in progress' already - but this is not a problem and
-	 * might happen in other circumstances as well.
+	 * The slow path - the local batch-queue is too short and
+	 * needs balancing. We unlock the runqueue (but keep
+	 * interrupts disabled) to simplify locking. (It does not
+	 * matter if the runqueues change meanwhile - this is all
+	 * statistical balancing so only the long run effects matter.)
 	 */
-needs_resched:
-	{
-		unsigned long flags;
+	spin_unlock(&this_rq->lock);
 
-		/*
-		 * Avoid taking the runqueue lock in cases where
-		 * no preemption-check is necessery:
-		 */
-		if ((prev == idle_task(smp_processor_id())) ||
-						(policy & SCHED_YIELD))
-			goto out_unlock;
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_online(i) || (i == this_cpu))
+			continue;
 
-		spin_lock_irqsave(&runqueue_lock, flags);
-		if ((prev->state == TASK_RUNNING) && !task_has_cpu(prev))
-			reschedule_idle(prev);
-		spin_unlock_irqrestore(&runqueue_lock, flags);
-		goto out_unlock;
+		rq_src = cpu_rq(i);
+		double_rq_lock(this_rq, rq_src);
+
+		rq_goal = rq_src->idle_avg * nr_batch / nr_idle;
+
+		if (rq_src->nr_batch > rq_goal) {
+			/*
+			 * Migrate a single batch-process.
+			 */
+			list_t *tmp = rq_src->batch_queue.prev;
+
+			list_del(tmp);
+			list_add_tail(tmp, &this_rq->batch_queue);
+			rq_src->nr_batch--;
+			this_rq->nr_batch++;
+			set_task_cpu(list_entry(tmp, task_t, run_list), this_cpu);
+		}
+
+		double_rq_unlock(this_rq, rq_src);
+		if (this_rq->nr_batch >= goal)
+			break;
 	}
-#else
-	prev->policy &= ~SCHED_YIELD;
-#endif /* CONFIG_SMP */
+	spin_lock(&this_rq->lock);
+}
+/*
+ * Lock the busiest runqueue as well, this_rq is locked already.
+ * Recalculate nr_running if we have to drop the runqueue lock.
+ */
+static inline unsigned int double_lock_balance(runqueue_t *this_rq,
+	runqueue_t *busiest, int this_cpu, int idle, unsigned int nr_running)
+{
+	if (unlikely(!spin_trylock(&busiest->lock))) {
+		if (busiest < this_rq) {
+			spin_unlock(&this_rq->lock);
+			spin_lock(&busiest->lock);
+			spin_lock(&this_rq->lock);
+			/* Need to recalculate nr_running */
+			if (idle || (this_rq->nr_running > this_rq->prev_nr_running[this_cpu]))
+				nr_running = this_rq->nr_running;
+			else
+			nr_running = this_rq->prev_nr_running[this_cpu];
+		} else
+			spin_lock(&busiest->lock);
+	}
+	return nr_running;
+}
+
+static inline runqueue_t *find_busiest_queue(runqueue_t *this_rq, int this_cpu, int idle, int *imbalance)
+{
+	int nr_running, load, max_load, i;
+	runqueue_t *busiest, *rq_src;
+
+	/*
+	 * We search all runqueues to find the most busy one.
+	 * We do this lockless to reduce cache-bouncing overhead,
+	 * we re-check the 'best' source CPU later on again, with
+	 * the lock held.
+	 *
+	 * We fend off statistical fluctuations in runqueue lengths by
+	 * saving the runqueue length during the previous load-balancing
+	 * operation and using the smaller one the current and saved lengths.
+	 * If a runqueue is long enough for a longer amount of time then
+	 * we recognize it and pull tasks from it.
+	 *
+	 * The 'current runqueue length' is a statistical maximum variable,
+	 * for that one we take the longer one - to avoid fluctuations in
+	 * the other direction. So for a load-balance to happen it needs
+	 * stable long runqueue on the target CPU and stable short runqueue
+	 * on the local runqueue.
+	 *
+	 * We make an exception if this CPU is about to become idle - in
+	 * that case we are less picky about moving a task across CPUs and
+	 * take what can be taken.
+	 */
+	if (idle || (this_rq->nr_running > this_rq->prev_nr_running[this_cpu]))
+		nr_running = this_rq->nr_running;
+	else
+		nr_running = this_rq->prev_nr_running[this_cpu];
+
+	busiest = NULL;
+	max_load = 1;
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_online(i))
+			continue;
+
+		rq_src = cpu_rq(i);
+		if (idle || (rq_src->nr_running < this_rq->prev_nr_running[i]))
+			load = rq_src->nr_running;
+		else
+			load = this_rq->prev_nr_running[i];
+		this_rq->prev_nr_running[i] = rq_src->nr_running;
+
+		if ((load > max_load) && (rq_src != this_rq)) {
+			busiest = rq_src;
+			max_load = load;
+		}
+	}
+
+	if (likely(!busiest))
+		goto out;
+
+	*imbalance = (max_load - nr_running) / 2;
+
+	/* It needs an at least ~25% imbalance to trigger balancing. */
+	if (!idle && (*imbalance < (max_load + 3)/4)) {
+		busiest = NULL;
+		goto out;
+	}
+
+	nr_running = double_lock_balance(this_rq, busiest, this_cpu, idle, nr_running);
+	/*
+	 * Make sure nothing changed since we checked the
+	 * runqueue length.
+	 */
+	if (busiest->nr_running <= nr_running + 1) {
+		spin_unlock(&busiest->lock);
+		busiest = NULL;
+	}
+out:
+	return busiest;
 }
 
-asmlinkage void schedule_tail(struct task_struct *prev)
+/*
+ * Move a task from a remote runqueue to the local runqueue.
+ * Both runqueues must be locked.
+ */
+static inline void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, runqueue_t *this_rq, int this_cpu)
 {
-	__schedule_tail(prev);
+	dequeue_task(p, src_array);
+	src_rq->nr_running--;
+	set_task_cpu(p, this_cpu);
+	this_rq->nr_running++;
+	enqueue_task(p, this_rq->active);
+	/*
+	 * Note that idle threads have a prio of MAX_PRIO, for this test
+	 * to be always true for them.
+	 */
+	if (p->prio < this_rq->curr->prio)
+		set_need_resched();
 }
 
 /*
- *  'schedule()' is the scheduler function. It's a very simple and nice
- * scheduler: it's not perfect, but certainly works for most things.
- *
- * The goto is "interesting".
+ * Current runqueue is empty, or rebalance tick: if there is an
+ * inbalance (current runqueue is too short) then pull from
+ * busiest runqueue(s).
  *
- *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other
- * tasks can run. It can not be killed, and it cannot sleep. The 'state'
- * information in task[0] is never used.
+ * We call this with the current runqueue locked,
+ * irqs disabled.
  */
-asmlinkage void schedule(void)
+static void load_balance(runqueue_t *this_rq, int idle)
 {
-	struct schedule_data * sched_data;
-	struct task_struct *prev, *next, *p;
-	struct list_head *tmp;
-	int this_cpu, c;
+	int imbalance, idx, this_cpu = smp_processor_id();
+	runqueue_t *busiest;
+	prio_array_t *array;
+	list_t *head, *curr;
+	task_t *tmp;
 
+	busiest = find_busiest_queue(this_rq, this_cpu, idle, &imbalance);
+	if (!busiest)
+		goto balance_batch;
 
-	spin_lock_prefetch(&runqueue_lock);
+	/*
+	 * We first consider expired tasks. Those will likely not be
+	 * executed in the near future, and they are most likely to
+	 * be cache-cold, thus switching CPUs has the least effect
+	 * on them.
+	 */
+	if (busiest->expired->nr_active)
+		array = busiest->expired;
+	else
+		array = busiest->active;
 
-	BUG_ON(!current->active_mm);
-need_resched_back:
-	prev = current;
-	this_cpu = prev->processor;
+new_array:
+	/* Start searching at priority 0: */
+	idx = 0;
+skip_bitmap:
+	if (!idx)
+		idx = sched_find_first_bit(array->bitmap);
+	else
+		idx = find_next_bit(array->bitmap, MAX_PRIO, idx);
+	if (idx == MAX_PRIO) {
+		if (array == busiest->expired) {
+			array = busiest->active;
+			goto new_array;
+		}
+		goto out_unlock;
+	}
 
-	if (unlikely(in_interrupt())) {
-		printk("Scheduling in interrupt\n");
-		BUG();
+	head = array->queue + idx;
+	curr = head->prev;
+skip_queue:
+	tmp = list_entry(curr, task_t, run_list);
+
+	/*
+	 * We do not migrate tasks that are:
+	 * 1) running (obviously), or
+	 * 2) cannot be migrated to this CPU due to cpus_allowed, or	 
+	 * 3) are cache-hot on their current CPU.
+	 */
+
+#define CAN_MIGRATE_TASK(p,rq,this_cpu)					\
+	((jiffies - (p)->timestamp > cache_decay_ticks) &&	\
+		!task_running(rq, p) &&					\
+			((p)->cpus_allowed & (1UL << (this_cpu))))
+
+	curr = curr->prev;
+
+	if (!CAN_MIGRATE_TASK(tmp, busiest, this_cpu)) {
+		if (curr != head)
+			goto skip_queue;
+		idx++;
+		goto skip_bitmap;
+	}
+	pull_task(busiest, array, tmp, this_rq, this_cpu);
+	if (!idle && --imbalance) {
+		if (curr != head)
+			goto skip_queue;
+		idx++;
+		goto skip_bitmap;
 	}
+out_unlock:
+	spin_unlock(&busiest->lock);
+balance_batch:
+	load_balance_batch(this_rq, this_cpu);
+}
+
+/*
+ * One of the idle_cpu_tick() or the busy_cpu_tick() function will
+ * gets called every timer tick, on every CPU. Our balancing action
+ * frequency and balancing agressivity depends on whether the CPU is
+ * idle or not.
+ *
+ * busy-rebalance every 250 msecs. idle-rebalance every 1 msec. (or on
+ * systems with HZ=100, every 10 msecs.)
+ */
+#define BUSY_REBALANCE_TICK (HZ/4 ?: 1)
+#define IDLE_REBALANCE_TICK (HZ/1000 ?: 1)
+
+static inline void idle_tick(runqueue_t *rq)
+{
+	if (jiffies % IDLE_REBALANCE_TICK)
+		return;
+	spin_lock(&rq->lock);
+	load_balance(rq, 1);
+	spin_unlock(&rq->lock);
+}
 
-	release_kernel_lock(prev, this_cpu);
+#endif
 
-	/*
-	 * 'sched_data' is protected by the fact that we can run
-	 * only one process per CPU.
-	 */
-	sched_data = & aligned_data[this_cpu].schedule_data;
+/*
+ * We place interactive tasks back into the active array, if possible.
+ *
+ * To guarantee that this does not starve expired tasks we ignore the
+ * interactivity of a task if the first expired task had to wait more
+ * than a 'reasonable' amount of time. This deadline timeout is
+ * load-dependent, as the frequency of array switched decreases with
+ * increasing number of running tasks. We also ignore the interactivity
+ * if a better static_prio task has expired:
+ */
+#define EXPIRED_STARVING(rq) \
+		(( STARVATION_LIMIT && ((rq)->expired_timestamp && \
+		(jiffies - (rq)->expired_timestamp >= \
+			STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
+				((rq)->curr->static_prio > (rq)->best_expired_prio))
 
-	spin_lock_irq(&runqueue_lock);
+/*
+ * This function gets called by the timer code, with HZ frequency.
+ * We call it with interrupts disabled.
+ */
+void scheduler_tick(int user_ticks, int sys_ticks)
+{
+	int cpu = smp_processor_id();
+	runqueue_t *rq = this_rq();
+	task_t *p = current;
+
+#if CONFIG_SMP
+	if (user_ticks || sys_ticks) {
+		/*
+		 * This code is rare, triggered only once per second:
+		 */
+		if (--rq->idle_ticks_left <= 0) {
+			/*
+			 * Maintain a simple running average:
+			 */
+			rq->idle_avg += rq->idle_count;
+			rq->idle_avg >>= 1;
+
+			rq->idle_ticks_left = IDLE_TICKS;
+			rq->idle_count = 0;
 
-	/* move an exhausted RR process to be last.. */
-	if (unlikely(prev->policy == SCHED_RR))
-		if (!prev->counter) {
-			prev->counter = NICE_TO_TICKS(prev->nice);
-			move_last_runqueue(prev);
 		}
+	}
+	if (p == rq->idle || p->policy == SCHED_BATCH)
+		rq->idle_count++;
+#endif
+	if (p == rq->idle) {
+		if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
+			kstat.per_cpu_system[cpu] += sys_ticks;
+#if CONFIG_SMP
+		idle_tick(rq);
+#endif
+		return;
+	}
+	if (TASK_NICE(p) > 0 || p->policy == SCHED_BATCH)
+		kstat.per_cpu_nice[cpu] += user_ticks;
+	else
+		kstat.per_cpu_user[cpu] += user_ticks;
+	kstat.per_cpu_system[cpu] += sys_ticks;
 
-	switch (prev->state) {
-		case TASK_INTERRUPTIBLE:
-			if (signal_pending(prev)) {
-				prev->state = TASK_RUNNING;
-				break;
-			}
-		default:
-			del_from_runqueue(prev);
-		case TASK_RUNNING:;
+	/* Task might have expired already, but not scheduled off yet */
+	if (p->array != rq->active) {
+		set_tsk_need_resched(p);
+		return;
+	}
+	spin_lock(&rq->lock);
+	if (unlikely(rt_task(p))) {
+		/*
+		 * RR tasks need a special form of timeslice management.
+		 * FIFO tasks have no timeslices.
+		 */
+		if ((p->policy == SCHED_RR) && !--p->time_slice) {
+			p->time_slice = task_timeslice(p);
+			p->first_time_slice = 0;
+			set_tsk_need_resched(p);
+
+			/* put it at the end of the queue: */
+			dequeue_task(p, rq->active);
+			enqueue_task(p, rq->active);
+		}
+		goto out;
+	}
+	/*
+	 * The task was running during this tick - update the
+	 * time slice counter and the sleep average. Note: we
+	 * do not update a process's priority until it either
+	 * goes to sleep or uses up its timeslice. This makes
+	 * it possible for interactive tasks to use up their
+	 * timeslices at their highest priority levels.
+	 */
+	if (!--p->time_slice) {
+		dequeue_task(p, rq->active);
+		set_tsk_need_resched(p);
+		p->prio = effective_prio(p);
+		p->time_slice = task_timeslice(p);
+		p->first_time_slice = 0;
+
+		if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
+			if (!rq->expired_timestamp)
+				rq->expired_timestamp = jiffies;
+			enqueue_task(p, rq->expired);
+			if (p->static_prio < rq->best_expired_prio)
+				rq->best_expired_prio = p->static_prio;
+		} else
+			enqueue_task(p, rq->active);
+	} else {
+		/*
+		 * Prevent a too long timeslice allowing a task to monopolize
+		 * the CPU. We do this by splitting up the timeslice into
+		 * smaller pieces.
+		 *
+		 * Note: this does not mean the task's timeslices expire or
+		 * get lost in any way, they just might be preempted by
+		 * another task of equal priority. (one with higher
+		 * priority would have preempted this task already.) We
+		 * requeue this task to the end of the list on this priority
+		 * level, which is in essence a round-robin of tasks with
+		 * equal priority.
+		 */
+		if (p->mm && TASK_INTERACTIVE(p) && !((task_timeslice(p) -
+			p->time_slice) % TIMESLICE_GRANULARITY(p)) &&
+			(p->time_slice > MIN_TIMESLICE) &&
+			(p->array == rq->active)) {
+				dequeue_task(p, rq->active);
+				set_tsk_need_resched(p);
+				p->prio = effective_prio(p);
+				enqueue_task(p, rq->active);
+		}
+
+	}
+out:
+#if CONFIG_SMP
+	if (!(jiffies % BUSY_REBALANCE_TICK))
+		load_balance(rq, 0);
+#endif
+	spin_unlock(&rq->lock);
+}
+
+void scheduling_functions_start_here(void) { }
+
+/*
+ * This function is called by the lowlevel kernel entry code if
+ * pure userspace code is preempted. Such processes, if SCHED_BATCH,
+ * are candidates for batch scheduling. Every other process (including
+ * kernel-mode SCHED_BATCH processes) is scheduled in a non-batch way.
+ */
+asmlinkage void schedule_userspace(void)
+{
+	runqueue_t *rq;
+
+	if (current->policy != SCHED_BATCH) {
+		schedule();
+		return;
 	}
-	prev->need_resched = 0;
 
 	/*
-	 * this is the scheduler proper:
+	 * Only handle batch tasks that are runnable.
 	 */
+	if (current->state == TASK_RUNNING) {
+		rq = this_rq_lock();
+		deactivate_batch_task(current, rq);
+
+		// we can keep irqs disabled:
+		spin_unlock(&rq->lock);
+	}
+
+	schedule();
+}
+
+/*
+ * 'schedule()' is the main scheduler function.
+ */
+asmlinkage void schedule(void)
+{
+	task_t *prev, *next;
+	runqueue_t *rq;
+	prio_array_t *array;
+	list_t *queue;
+	unsigned long run_time;
+	int idx;
+	if (unlikely(in_interrupt()))
+		BUG();
 
-repeat_schedule:
-	/*
-	 * Default process to select..
-	 */
-	next = idle_task(this_cpu);
-	c = -1000;
-	list_for_each(tmp, &runqueue_head) {
-		p = list_entry(tmp, struct task_struct, run_list);
-		if (can_schedule(p, this_cpu)) {
-			int weight = goodness(p, this_cpu, prev->active_mm);
-			if (weight > c)
-				c = weight, next = p;
-		}
-	}
+need_resched:
+	preempt_disable();
+	prev = current;
+	rq = this_rq();
 
-	/* Do we need to re-calculate counters? */
-	if (unlikely(!c)) {
-		struct task_struct *p;
-
-		spin_unlock_irq(&runqueue_lock);
-		read_lock(&tasklist_lock);
-		for_each_task(p)
-			p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
-		read_unlock(&tasklist_lock);
-		spin_lock_irq(&runqueue_lock);
-		goto repeat_schedule;
-	}
+	release_kernel_lock(prev, smp_processor_id());
+	if (likely(jiffies - prev->timestamp < MAX_SLEEP_AVG))
+		run_time = jiffies - prev->timestamp;
+	else
+		run_time = MAX_SLEEP_AVG;
 
 	/*
-	 * from this point on nothing can prevent us from
-	 * switching to the next task, save this fact in
-	 * sched_data.
+	 * Tasks with interactive credits get charged less run_time
+	 * at high sleep_avg to delay them losing their interactive
+	 * status
 	 */
-	sched_data->curr = next;
-	task_set_cpu(next, this_cpu);
-	spin_unlock_irq(&runqueue_lock);
-
-	if (unlikely(prev == next)) {
-		/* We won't go through the normal tail, so do this by hand */
-		prev->policy &= ~SCHED_YIELD;
-		goto same_process;
-	}
-
-#ifdef CONFIG_SMP
- 	/*
- 	 * maintain the per-process 'last schedule' value.
- 	 * (this has to be recalculated even if we reschedule to
- 	 * the same process) Currently this is only used on SMP,
-	 * and it's approximate, so we do not have to maintain
-	 * it while holding the runqueue spinlock.
- 	 */
- 	sched_data->last_schedule = get_cycles();
+	if (HIGH_CREDIT(prev))
+		run_time /= (CURRENT_BONUS(prev) ? : 1);
 
 	/*
-	 * We drop the scheduler lock early (it's a global spinlock),
-	 * thus we have to lock the previous process from getting
-	 * rescheduled during switch_to().
+	 * Ensure everything gets charged at least one tick.
 	 */
+	if (!run_time)
+		run_time = 1;
 
-#endif /* CONFIG_SMP */
+	spin_lock_irq(&rq->lock);
 
-	kstat.context_swtch++;
+#ifdef CONFIG_PREEMPT
 	/*
-	 * there are 3 processes which are affected by a context switch:
-	 *
-	 * prev == .... ==> (last => next)
-	 *
-	 * It's the 'much more previous' 'prev' that is on next's stack,
-	 * but prev is set to (the just run) 'last' process by switch_to().
-	 * This might sound slightly confusing but makes tons of sense.
+	 * if entering from preempt_schedule, off a kernel preemption,
+	 * go straight to picking the next task.
 	 */
-	prepare_to_switch();
-	{
-		struct mm_struct *mm = next->mm;
-		struct mm_struct *oldmm = prev->active_mm;
-		if (!mm) {
-			BUG_ON(next->active_mm);
-			next->active_mm = oldmm;
-			atomic_inc(&oldmm->mm_count);
-			enter_lazy_tlb(oldmm, next, this_cpu);
-		} else {
-			BUG_ON(next->active_mm != mm);
-			switch_mm(oldmm, mm, next, this_cpu);
+	if (unlikely(preempt_get_count() & PREEMPT_ACTIVE))
+		goto pick_next_task;
+#endif
+	switch (prev->state) {
+	case TASK_INTERRUPTIBLE:
+		if (unlikely(signal_pending(prev))) {
+			prev->state = TASK_RUNNING;
+			break;
 		}
+	default:
+		deactivate_task(prev, rq);
+	case TASK_RUNNING:
+		;
+	}
+pick_next_task:
+	if (unlikely(!rq->nr_running)) {
+#if CONFIG_SMP
+		load_balance(rq, 1);
+		if (rq->nr_running)
+			goto pick_next_task;
+#endif
+		/*
+		 * Pick a task from the batch queue if available.
+		 */
+		if (rq->nr_batch) {
+			list_t *tmp = rq->batch_queue.next;
 
-		if (!prev->mm) {
-			prev->active_mm = NULL;
-			mmdrop(oldmm);
-		}
+			next = list_entry(tmp, task_t, run_list);
+			activate_batch_task(next, rq);
+		} else
+			next = rq->idle;
+		rq->expired_timestamp = 0;
+		goto switch_tasks;
 	}
 
-	/*
-	 * This just switches the register state and the
-	 * stack.
-	 */
-	switch_to(prev, next, prev);
-	__schedule_tail(prev);
+	array = rq->active;
+	if (unlikely(!array->nr_active)) {
+		/*
+		 * Switch the active and expired arrays.
+		 */
+		rq->active = rq->expired;
+		rq->expired = array;
+		array = rq->active;
+		rq->expired_timestamp = 0;
+		rq->best_expired_prio = MAX_PRIO;
+	}
+
+	idx = sched_find_first_bit(array->bitmap);
+	queue = array->queue + idx;
+	next = list_entry(queue->next, task_t, run_list);
+
+	if (next->activated > 0) {
+		unsigned long delta = jiffies - next->timestamp;
+
+		if (next->activated == 1)
+			delta = delta * ON_RUNQUEUE_WEIGHT / 100;
+
+		array = next->array;
+		dequeue_task(next, array);
+		recalc_task_prio(next, next->timestamp + delta);
+		enqueue_task(next, array);
+	}
+	next->activated = 0;
+switch_tasks:
+	prefetch(next);
+	clear_tsk_need_resched(prev);
+
+	prev->sleep_avg -= run_time;
+	if ((long)prev->sleep_avg <= 0){
+		prev->sleep_avg = 0;
+		if (!(HIGH_CREDIT(prev) || LOW_CREDIT(prev)))
+			prev->interactive_credit--;
+	}
+	prev->timestamp = jiffies;
+
+	if (likely(prev != next)) {
+		rq->nr_switches++;
+		rq->curr = next;
+
+		prepare_arch_switch(rq, next);
+		prev = context_switch(prev, next);
+		barrier();
+		rq = this_rq();
+		finish_arch_switch(rq, prev);
+	} else
+		spin_unlock_irq(&rq->lock);
 
-same_process:
 	reacquire_kernel_lock(current);
-	if (current->need_resched)
-		goto need_resched_back;
-	return;
+	preempt_enable_no_resched();
+	if (need_resched())
+		goto need_resched;
+}
+
+#ifdef CONFIG_PREEMPT
+/*
+ * this is is the entry point to schedule() from in-kernel preemption
+ */
+asmlinkage void preempt_schedule(void)
+{
+	if (unlikely(irqs_disabled()))
+		return;
+
+need_resched:
+	current->preempt_count += PREEMPT_ACTIVE;
+	schedule();
+	current->preempt_count -= PREEMPT_ACTIVE;
+
+	/* we could miss a preemption opportunity between schedule and now */
+	barrier();
+	if (unlikely(current->need_resched))
+		goto need_resched;
 }
+#endif /* CONFIG_PREEMPT */
 
 /*
- * The core wakeup function.  Non-exclusive wakeups (nr_exclusive == 0) just wake everything
- * up.  If it's an exclusive wakeup (nr_exclusive == small +ve number) then we wake all the
- * non-exclusive tasks and one exclusive task.
+ * The core wakeup function.  Non-exclusive wakeups (nr_exclusive == 0) just
+ * wake everything up.  If it's an exclusive wakeup (nr_exclusive == small +ve
+ * number) then we wake all the non-exclusive tasks and one exclusive task.
  *
  * There are circumstances in which we can try to wake a task which has already
- * started to run but is not in state TASK_RUNNING.  try_to_wake_up() returns zero
- * in this (rare) case, and we handle it by contonuing to scan the queue.
+ * started to run but is not in state TASK_RUNNING.  try_to_wake_up() returns
+ * zero in this (rare) case, and we handle it by continuing to scan the queue.
  */
-static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
-			 	     int nr_exclusive, const int sync)
+static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int sync)
 {
 	struct list_head *tmp;
-	struct task_struct *p;
-
-	CHECK_MAGIC_WQHEAD(q);
-	WQ_CHECK_LIST_HEAD(&q->task_list);
-	
-	list_for_each(tmp,&q->task_list) {
-		unsigned int state;
-                wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
+	unsigned int state;
+	wait_queue_t *curr;
+	task_t *p;
 
-		CHECK_MAGIC(curr->__magic);
+	list_for_each(tmp, &q->task_list) {
+		curr = list_entry(tmp, wait_queue_t, task_list);
 		p = curr->task;
 		state = p->state;
-		if (state & mode) {
-			WQ_NOTE_WAKER(curr);
-			if (try_to_wake_up(p, sync) && (curr->flags&WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
+		if ((state & mode) && try_to_wake_up(p, sync) &&
+			((curr->flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive))
 				break;
-		}
 	}
 }
 
-void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, int nr)
+void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
 {
-	if (q) {
-		unsigned long flags;
-		wq_read_lock_irqsave(&q->lock, flags);
-		__wake_up_common(q, mode, nr, 0);
-		wq_read_unlock_irqrestore(&q->lock, flags);
-	}
+	unsigned long flags;
+
+	if (unlikely(!q))
+		return;
+
+	spin_lock_irqsave(&q->lock, flags);
+	__wake_up_common(q, mode, nr_exclusive, 0);
+	spin_unlock_irqrestore(&q->lock, flags);
 }
 
-void fastcall __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)
+#if CONFIG_SMP
+
+void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
 {
-	if (q) {
-		unsigned long flags;
-		wq_read_lock_irqsave(&q->lock, flags);
-		__wake_up_common(q, mode, nr, 1);
-		wq_read_unlock_irqrestore(&q->lock, flags);
-	}
+	unsigned long flags;
+
+	if (unlikely(!q))
+		return;
+
+	spin_lock_irqsave(&q->lock, flags);
+	if (likely(nr_exclusive))
+		__wake_up_common(q, mode, nr_exclusive, 1);
+	else
+		__wake_up_common(q, mode, nr_exclusive, 0);
+	spin_unlock_irqrestore(&q->lock, flags);
 }
 
+#endif
+
 void fastcall complete(struct completion *x)
 {
 	unsigned long flags;
@@ -791,14 +1494,14 @@
 	init_waitqueue_entry(&wait, current);
 
 #define	SLEEP_ON_HEAD					\
-	wq_write_lock_irqsave(&q->lock,flags);		\
+	spin_lock_irqsave(&q->lock,flags);		\
 	__add_wait_queue(q, &wait);			\
-	wq_write_unlock(&q->lock);
+	spin_unlock(&q->lock);
 
 #define	SLEEP_ON_TAIL						\
-	wq_write_lock_irq(&q->lock);				\
+	spin_lock_irq(&q->lock);				\
 	__remove_wait_queue(q, &wait);				\
-	wq_write_unlock_irqrestore(&q->lock,flags);
+	spin_unlock_irqrestore(&q->lock,flags);
 
 void fastcall interruptible_sleep_on(wait_queue_head_t *q)
 {
@@ -850,43 +1553,41 @@
 
 void scheduling_functions_end_here(void) { }
 
-#if CONFIG_SMP
-/**
- * set_cpus_allowed() - change a given task's processor affinity
- * @p: task to bind
- * @new_mask: bitmask of allowed processors
- *
- * Upon return, the task is running on a legal processor.  Note the caller
- * must have a valid reference to the task: it must not exit() prematurely.
- * This call can sleep; do not hold locks on call.
- */
-void set_cpus_allowed(struct task_struct *p, unsigned long new_mask)
+void set_user_nice(task_t *p, long nice)
 {
-	new_mask &= cpu_online_map;
-	BUG_ON(!new_mask);
-
-	p->cpus_allowed = new_mask;
+	unsigned long flags;
+	prio_array_t *array;
+	runqueue_t *rq;
 
+	if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
+		return;
 	/*
-	 * If the task is on a no-longer-allowed processor, we need to move
-	 * it.  If the task is not current, then set need_resched and send
-	 * its processor an IPI to reschedule.
-	 */
-	if (!(p->cpus_runnable & p->cpus_allowed)) {
-		if (p != current) {
-			p->need_resched = 1;
-			smp_send_reschedule(p->processor);
-		}
+	 * We have to be careful, if called from sys_setpriority(),
+	 * the task might be in the middle of scheduling on another CPU.
+	 */
+	rq = task_rq_lock(p, &flags);
+	if (rt_task(p)) {
+		p->static_prio = NICE_TO_PRIO(nice);
+		goto out_unlock;
+	}
+	array = p->array;
+	if (array)
+		dequeue_task(p, array);
+	p->static_prio = NICE_TO_PRIO(nice);
+	p->prio = NICE_TO_PRIO(nice);
+	if (array) {
+		enqueue_task(p, array);
 		/*
-		 * Wait until we are on a legal processor.  If the task is
-		 * current, then we should be on a legal processor the next
-		 * time we reschedule.  Otherwise, we need to wait for the IPI.
+		 * If the task is running and lowered its priority,
+		 * or increased its priority then reschedule its CPU:
 		 */
-		while (!(p->cpus_runnable & p->cpus_allowed))
-			schedule();
+		if ((NICE_TO_PRIO(nice) < p->static_prio) ||
+							task_running(rq, p))
+			resched_task(rq->curr);
 	}
+out_unlock:
+	task_rq_unlock(rq, &flags);
 }
-#endif /* CONFIG_SMP */
 
 #ifndef __alpha__
 
@@ -898,7 +1599,7 @@
 
 asmlinkage long sys_nice(int increment)
 {
-	long newprio;
+	long nice;
 
 	/*
 	 *	Setpriority might change our priority at the same moment.
@@ -914,34 +1615,52 @@
 	if (increment > 40)
 		increment = 40;
 
-	newprio = current->nice + increment;
-	if (newprio < -20)
-		newprio = -20;
-	if (newprio > 19)
-		newprio = 19;
-	current->nice = newprio;
+	nice = PRIO_TO_NICE(current->static_prio) + increment;
+	if (nice < -20)
+		nice = -20;
+	if (nice > 19)
+		nice = 19;
+	set_user_nice(current, nice);
 	return 0;
 }
 
 #endif
 
-static inline struct task_struct *find_process_by_pid(pid_t pid)
+/*
+ * This is the priority value as seen by users in /proc
+ *
+ * RT tasks are offset by -200. Normal tasks are centered
+ * around 0, value goes from -16 to +15.
+ */
+int task_prio(task_t *p)
+{
+	return p->prio - MAX_USER_RT_PRIO;
+}
+
+int task_nice(task_t *p)
+{
+	return TASK_NICE(p);
+}
+
+int idle_cpu(int cpu)
 {
-	struct task_struct *tsk = current;
+	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
+}
 
-	if (pid)
-		tsk = find_task_by_pid(pid);
-	return tsk;
+static inline task_t *find_process_by_pid(pid_t pid)
+{
+	return pid ? find_task_by_pid(pid) : current;
 }
 
-static int setscheduler(pid_t pid, int policy, 
-			struct sched_param *param)
+static int setscheduler(pid_t pid, int policy, struct sched_param *param)
 {
 	struct sched_param lp;
-	struct task_struct *p;
-	int retval;
+	int retval = -EINVAL;
+	prio_array_t *array;
+	unsigned long flags;
+	runqueue_t *rq;
+	task_t *p;
 
-	retval = -EINVAL;
 	if (!param || pid < 0)
 		goto out_nounlock;
 
@@ -953,56 +1672,73 @@
 	 * We play safe to avoid deadlocks.
 	 */
 	read_lock_irq(&tasklist_lock);
-	spin_lock(&runqueue_lock);
 
 	p = find_process_by_pid(pid);
 
 	retval = -ESRCH;
 	if (!p)
-		goto out_unlock;
-			
+		goto out_unlock_tasklist;
+
+	/*
+	 * To be able to change p->policy safely, the apropriate
+	 * runqueue lock must be held.
+	 */
+	rq = task_rq_lock(p, &flags);
+
 	if (policy < 0)
 		policy = p->policy;
 	else {
 		retval = -EINVAL;
 		if (policy != SCHED_FIFO && policy != SCHED_RR &&
-				policy != SCHED_OTHER)
+				policy != SCHED_NORMAL && policy != SCHED_BATCH)
 			goto out_unlock;
 	}
-	
+
 	/*
-	 * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
-	 * priority for SCHED_OTHER is 0.
+	 * Valid priorities for SCHED_FIFO and SCHED_RR are
+	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
+	 * SCHED_BATCH is 0.
 	 */
 	retval = -EINVAL;
-	if (lp.sched_priority < 0 || lp.sched_priority > 99)
+	if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1)
 		goto out_unlock;
-	if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
+	if ((policy == SCHED_NORMAL || policy == SCHED_BATCH) !=
+						(lp.sched_priority == 0))
 		goto out_unlock;
 
 	retval = -EPERM;
-	if ((policy == SCHED_FIFO || policy == SCHED_RR) && 
+	if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
 	    !capable(CAP_SYS_NICE))
 		goto out_unlock;
 	if ((current->euid != p->euid) && (current->euid != p->uid) &&
 	    !capable(CAP_SYS_NICE))
 		goto out_unlock;
 
+	if (p->flags & PF_BATCH)
+		activate_batch_task(p, rq);
+	array = p->array;
+	if (array)
+		deactivate_task(p, task_rq(p));
 	retval = 0;
 	p->policy = policy;
 	p->rt_priority = lp.sched_priority;
-
-	current->need_resched = 1;
+	if (policy != SCHED_NORMAL && policy != SCHED_BATCH)
+		p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority;
+	else
+		p->prio = p->static_prio;
+	if (array)
+		activate_task(p, task_rq(p));
 
 out_unlock:
-	spin_unlock(&runqueue_lock);
+	task_rq_unlock(rq, &flags);
+out_unlock_tasklist:
 	read_unlock_irq(&tasklist_lock);
 
 out_nounlock:
 	return retval;
 }
 
-asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, 
+asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
 				      struct sched_param *param)
 {
 	return setscheduler(pid, policy, param);
@@ -1015,10 +1751,9 @@
 
 asmlinkage long sys_sched_getscheduler(pid_t pid)
 {
-	struct task_struct *p;
-	int retval;
+	int retval = -EINVAL;
+	task_t *p;
 
-	retval = -EINVAL;
 	if (pid < 0)
 		goto out_nounlock;
 
@@ -1026,7 +1761,7 @@
 	read_lock(&tasklist_lock);
 	p = find_process_by_pid(pid);
 	if (p)
-		retval = p->policy & ~SCHED_YIELD;
+		retval = p->policy;
 	read_unlock(&tasklist_lock);
 
 out_nounlock:
@@ -1035,11 +1770,10 @@
 
 asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param)
 {
-	struct task_struct *p;
 	struct sched_param lp;
-	int retval;
+	int retval = -EINVAL;
+	task_t *p;
 
-	retval = -EINVAL;
 	if (!param || pid < 0)
 		goto out_nounlock;
 
@@ -1064,44 +1798,121 @@
 	return retval;
 }
 
-asmlinkage long sys_sched_yield(void)
+/**
+ * sys_sched_setaffinity - set the cpu affinity of a process
+ * @pid: pid of the process
+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
+ * @user_mask_ptr: user-space pointer to the new cpu mask
+ */
+asmlinkage int sys_sched_setaffinity(pid_t pid, unsigned int len,
+                                     unsigned long *user_mask_ptr)
 {
+	unsigned long new_mask;
+	int retval;
+	task_t *p;
+
+	if (len < sizeof(new_mask))
+		return -EINVAL;
+
+	if (copy_from_user(&new_mask, user_mask_ptr, sizeof(new_mask)))
+		return -EFAULT;
+
+	new_mask &= cpu_online_map;
+	if (!new_mask)
+		return -EINVAL;
+
+	read_lock(&tasklist_lock);
+
+	p = find_process_by_pid(pid);
+	if (!p) {
+		read_unlock(&tasklist_lock);
+		return -ESRCH;
+	}
+
 	/*
-	 * Trick. sched_yield() first counts the number of truly 
-	 * 'pending' runnable processes, then returns if it's
-	 * only the current processes. (This test does not have
-	 * to be atomic.) In threaded applications this optimization
-	 * gets triggered quite often.
+	 * It is not safe to call set_cpus_allowed with the
+	 * tasklist_lock held.  We will bump the task_struct's
+	 * usage count and then drop tasklist_lock.
 	 */
+	get_task_struct(p);
+	read_unlock(&tasklist_lock);
 
-	int nr_pending = nr_running;
+	retval = -EPERM;
+	if ((current->euid != p->euid) && (current->euid != p->uid) &&
+			!capable(CAP_SYS_NICE))
+		goto out_unlock;
 
-#if CONFIG_SMP
-	int i;
+	retval = 0;
+	set_cpus_allowed(p, new_mask);
 
-	// Subtract non-idle processes running on other CPUs.
-	for (i = 0; i < smp_num_cpus; i++) {
-		int cpu = cpu_logical_map(i);
-		if (aligned_data[cpu].schedule_data.curr != idle_task(cpu))
-			nr_pending--;
-	}
-#else
-	// on UP this process is on the runqueue as well
-	nr_pending--;
-#endif
-	if (nr_pending) {
-		/*
-		 * This process can only be rescheduled by us,
-		 * so this is safe without any locking.
-		 */
-		if (current->policy == SCHED_OTHER)
-			current->policy |= SCHED_YIELD;
-		current->need_resched = 1;
-
-		spin_lock_irq(&runqueue_lock);
-		move_last_runqueue(current);
-		spin_unlock_irq(&runqueue_lock);
+out_unlock:
+	free_task_struct(p);
+	return retval;
+}
+
+/**
+ * sys_sched_getaffinity - get the cpu affinity of a process
+ * @pid: pid of the process
+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
+ * @user_mask_ptr: user-space pointer to hold the current cpu mask
+ */
+asmlinkage int sys_sched_getaffinity(pid_t pid, unsigned int len,
+                                     unsigned long *user_mask_ptr)
+{
+	unsigned int real_len;
+	unsigned long mask;
+	int retval;
+	task_t *p;
+
+	real_len = sizeof(mask);
+	if (len < real_len)
+		return -EINVAL;
+
+	read_lock(&tasklist_lock);
+
+	retval = -ESRCH;
+	p = find_process_by_pid(pid);
+	if (!p)
+		goto out_unlock;
+
+	retval = 0;
+	mask = p->cpus_allowed & cpu_online_map;
+
+out_unlock:
+	read_unlock(&tasklist_lock);
+	if (retval)
+		return retval;
+	if (copy_to_user(user_mask_ptr, &mask, real_len))
+		return -EFAULT;
+	return real_len;
+}
+
+asmlinkage long sys_sched_yield(void)
+{
+	runqueue_t *rq;
+	prio_array_t *array;
+	preempt_disable();
+	rq = this_rq_lock();
+	array = current->array;
+
+	/*
+	 * We implement yielding by moving the task into the expired
+	 * queue.
+	 *
+	 * (special rule: RT tasks will just roundrobin in the active
+	 *  array.)
+	 */
+	if (likely(!rt_task(current))) {
+		dequeue_task(current, array);
+		enqueue_task(current, rq->expired);
+	} else {
+		list_del(&current->run_list);
+		list_add_tail(&current->run_list, array->queue + current->prio);
 	}
+	spin_unlock(&rq->lock);
+
+	schedule();
+
 	return 0;
 }
 
@@ -1131,9 +1942,10 @@
 	switch (policy) {
 	case SCHED_FIFO:
 	case SCHED_RR:
-		ret = 99;
+		ret = MAX_USER_RT_PRIO-1;
 		break;
-	case SCHED_OTHER:
+	case SCHED_NORMAL:
+	case SCHED_BATCH:
 		ret = 0;
 		break;
 	}
@@ -1149,7 +1961,8 @@
 	case SCHED_RR:
 		ret = 1;
 		break;
-	case SCHED_OTHER:
+	case SCHED_NORMAL:
+	case SCHED_BATCH:
 		ret = 0;
 	}
 	return ret;
@@ -1157,9 +1970,9 @@
 
 asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
 {
-	struct timespec t;
-	struct task_struct *p;
 	int retval = -EINVAL;
+	struct timespec t;
+	task_t *p;
 
 	if (pid < 0)
 		goto out_nounlock;
@@ -1168,8 +1981,8 @@
 	read_lock(&tasklist_lock);
 	p = find_process_by_pid(pid);
 	if (p)
-		jiffies_to_timespec(p->policy & SCHED_FIFO ? 0 : NICE_TO_TICKS(p->nice),
-				    &t);
+		jiffies_to_timespec(p->policy & SCHED_FIFO ?
+					0 : task_timeslice(p), &t);
 	read_unlock(&tasklist_lock);
 	if (p)
 		retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
@@ -1177,14 +1990,14 @@
 	return retval;
 }
 
-static void show_task(struct task_struct * p)
+static void show_task(task_t * p)
 {
 	unsigned long free = 0;
 	int state;
 	static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
 
 	printk("%-13.13s ", p->comm);
-	state = p->state ? ffz(~p->state) + 1 : 0;
+	state = p->state ? __ffs(p->state) + 1 : 0;
 	if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *))
 		printk(stat_nam[state]);
 	else
@@ -1225,7 +2038,7 @@
 		printk(" (NOTLB)\n");
 
 	{
-		extern void show_trace_task(struct task_struct *tsk);
+		extern void show_trace_task(task_t *tsk);
 		show_trace_task(p);
 	}
 }
@@ -1247,7 +2060,7 @@
 
 void show_state(void)
 {
-	struct task_struct *p;
+	task_t *p;
 
 #if (BITS_PER_LONG == 32)
 	printk("\n"
@@ -1270,121 +2083,251 @@
 	read_unlock(&tasklist_lock);
 }
 
-/**
- * reparent_to_init() - Reparent the calling kernel thread to the init task.
- *
- * If a kernel thread is launched as a result of a system call, or if
- * it ever exits, it should generally reparent itself to init so that
- * it is correctly cleaned up on exit.
- *
- * The various task state such as scheduling policy and priority may have
- * been inherited fro a user process, so we reset them to sane values here.
- *
- * NOTE that reparent_to_init() gives the caller full capabilities.
- */
-void reparent_to_init(void)
+void __init init_idle(task_t *idle, int cpu)
 {
-	struct task_struct *this_task = current;
+	runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(task_cpu(idle));
+	unsigned long flags;
+
+	__save_flags(flags);
+	__cli();
+	double_rq_lock(idle_rq, rq);
+
+	idle_rq->curr = idle_rq->idle = idle;
+	deactivate_task(idle, rq);
+	idle->array = NULL;
+	idle->prio = MAX_PRIO;
+	idle->state = TASK_RUNNING;
+	set_task_cpu(idle, cpu);
+	double_rq_unlock(idle_rq, rq);
+	set_tsk_need_resched(idle);
+	__restore_flags(flags);
+
+	/* Set the preempt count _outside_ the spinlocks! */
+	idle->preempt_count = (idle->lock_depth >= 0);
+}
 
-	write_lock_irq(&tasklist_lock);
+#if CONFIG_SMP
 
-	/* Reparent to init */
-	REMOVE_LINKS(this_task);
-	this_task->p_pptr = child_reaper;
-	this_task->p_opptr = child_reaper;
-	SET_LINKS(this_task);
+/*
+ * This is how migration works:
+ *
+ * 1) we queue a migration_req_t structure in the source CPU's
+ *    runqueue and wake up that CPU's migration thread.
+ * 2) we down() the locked semaphore => thread blocks.
+ * 3) migration thread wakes up (implicitly it forces the migrated
+ *    thread off the CPU)
+ * 4) it gets the migration request and checks whether the migrated
+ *    task is still in the wrong runqueue.
+ * 5) if it's in the wrong runqueue then the migration thread removes
+ *    it and puts it into the right queue.
+ * 6) migration thread up()s the semaphore.
+ * 7) we wake up and the migration is done.
+ */
+
+typedef struct {
+	list_t list;
+	task_t *task;
+	struct semaphore sem;
+} migration_req_t;
 
-	/* Set the exit signal to SIGCHLD so we signal init on exit */
-	this_task->exit_signal = SIGCHLD;
+/*
+ * Change a given task's CPU affinity. Migrate the process to a
+ * proper CPU and schedule it away if the CPU it's executing on
+ * is removed from the allowed bitmask.
+ *
+ * NOTE: the caller must have a valid reference to the task, the
+ * task must not exit() & deallocate itself prematurely.  The
+ * call is not atomic; no spinlocks may be held.
+ */
+void set_cpus_allowed(task_t *p, unsigned long new_mask)
+{
+	unsigned long flags;
+	migration_req_t req;
+	runqueue_t *rq;
 
-	/* We also take the runqueue_lock while altering task fields
-	 * which affect scheduling decisions */
-	spin_lock(&runqueue_lock);
+	new_mask &= cpu_online_map;
+	if (!new_mask)
+		BUG();
 
-	this_task->ptrace = 0;
-	this_task->nice = DEF_NICE;
-	this_task->policy = SCHED_OTHER;
-	/* cpus_allowed? */
-	/* rt_priority? */
-	/* signals? */
-	this_task->cap_effective = CAP_INIT_EFF_SET;
-	this_task->cap_inheritable = CAP_INIT_INH_SET;
-	this_task->cap_permitted = CAP_FULL_SET;
-	this_task->keep_capabilities = 0;
-	memcpy(this_task->rlim, init_task.rlim, sizeof(*(this_task->rlim)));
-	switch_uid(INIT_USER);
+	preempt_disable();
+	rq = task_rq_lock(p, &flags);
+	p->cpus_allowed = new_mask;
+	/*
+	 * Can the task run on the task's current CPU? If not then
+	 * migrate the process off to a proper CPU.
+	 */
+	if (new_mask & (1UL << task_cpu(p))) {
+		task_rq_unlock(rq, &flags);
+		goto out;
+	}
+	/*
+	 * If the task is not on a runqueue (and not running), then
+	 * it is sufficient to simply update the task's cpu field.
+	 */
+	if (!p->array && !task_running(rq, p)) {
+		set_task_cpu(p, __ffs(p->cpus_allowed));
+		task_rq_unlock(rq, &flags);
+		goto out;
+	}
+	init_MUTEX_LOCKED(&req.sem);
+	req.task = p;
+	list_add(&req.list, &rq->migration_queue);
+	task_rq_unlock(rq, &flags);
+	wake_up_process(rq->migration_thread);
 
-	spin_unlock(&runqueue_lock);
-	write_unlock_irq(&tasklist_lock);
+	down(&req.sem);
+out:
+	preempt_enable();
+	return;
 }
 
-/*
- *	Put all the gunge required to become a kernel thread without
- *	attached user resources in one place where it belongs.
- */
+static __initdata int master_migration_thread;
 
-void daemonize(void)
+static int migration_thread(void * bind_cpu)
 {
-	struct fs_struct *fs;
+	int cpu = (int) (long) bind_cpu;
+	struct sched_param param = { sched_priority: MAX_RT_PRIO-1 };
+	runqueue_t *rq;
+	int ret;
 
+	daemonize();
+	sigfillset(&current->blocked);
+	set_fs(KERNEL_DS);
 
 	/*
-	 * If we were started as result of loading a module, close all of the
-	 * user space pages.  We don't need them, and if we didn't close them
-	 * they would be locked into memory.
+	 * The first migration thread is started on the boot CPU, it
+	 * migrates the other migration threads to their destination CPUs.
 	 */
-	exit_mm(current);
+	if (cpu != master_migration_thread) {
+		while (!cpu_rq(master_migration_thread)->migration_thread)
+			yield();
+		set_cpus_allowed(current, 1UL << cpu);
+	}
+	printk("migration_task %d on cpu=%d\n", cpu, smp_processor_id());
+	ret = setscheduler(0, SCHED_FIFO, &param);
 
-	current->session = 1;
-	current->pgrp = 1;
-	current->tty = NULL;
+	rq = this_rq();
+	rq->migration_thread = current;
 
-	/* Become as one with the init task */
+	sprintf(current->comm, "migration_CPU%d", smp_processor_id());
 
-	exit_fs(current);	/* current->fs->count--; */
-	fs = init_task.fs;
-	current->fs = fs;
-	atomic_inc(&fs->count);
- 	exit_files(current);
-	current->files = init_task.files;
-	atomic_inc(&current->files->count);
-}
+	for (;;) {
+		runqueue_t *rq_src, *rq_dest;
+		struct list_head *head;
+		int cpu_src, cpu_dest;
+		migration_req_t *req;
+		unsigned long flags;
+		task_t *p;
+
+		spin_lock_irqsave(&rq->lock, flags);
+		head = &rq->migration_queue;
+		current->state = TASK_INTERRUPTIBLE;
+		if (list_empty(head)) {
+			spin_unlock_irqrestore(&rq->lock, flags);
+			schedule();
+			continue;
+		}
+		req = list_entry(head->next, migration_req_t, list);
+		list_del_init(head->next);
+		spin_unlock_irqrestore(&rq->lock, flags);
+
+		p = req->task;
+		cpu_dest = __ffs(p->cpus_allowed);
+		rq_dest = cpu_rq(cpu_dest);
+repeat:
+		cpu_src = task_cpu(p);
+		rq_src = cpu_rq(cpu_src);
+
+		local_irq_save(flags);
+		double_rq_lock(rq_src, rq_dest);
+		if (task_cpu(p) != cpu_src) {
+			double_rq_unlock(rq_src, rq_dest);
+			local_irq_restore(flags);
+			goto repeat;
+		}
+		if (rq_src == rq) {
+			set_task_cpu(p, cpu_dest);
+			if (p->array) {
+				deactivate_task(p, rq_src);
+				activate_task(p, rq_dest);
+			}
+		}
+		double_rq_unlock(rq_src, rq_dest);
+		local_irq_restore(flags);
 
-extern unsigned long wait_init_idle;
+		up(&req->sem);
+	}
+}
 
-void __init init_idle(void)
+void __init migration_init(void)
 {
-	struct schedule_data * sched_data;
-	sched_data = &aligned_data[smp_processor_id()].schedule_data;
+	int cpu;
+
+	master_migration_thread = smp_processor_id();
+	current->cpus_allowed = 1UL << master_migration_thread;
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		if (!cpu_online(cpu))
+			continue;
+		if (kernel_thread(migration_thread, (void *) (long) cpu,
+				CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
+			BUG();
+	}
+	current->cpus_allowed = -1L;
 
-	if (current != &init_task && task_on_runqueue(current)) {
-		printk("UGH! (%d:%d) was on the runqueue, removing.\n",
-			smp_processor_id(), current->pid);
-		del_from_runqueue(current);
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		if (!cpu_online(cpu))
+			continue;
+		while (!cpu_rq(cpu)->migration_thread)
+			schedule_timeout(2);
 	}
-	sched_data->curr = current;
-	sched_data->last_schedule = get_cycles();
-	clear_bit(current->processor, &wait_init_idle);
 }
+#endif
 
-extern void init_timervecs (void);
+extern void init_timervecs(void);
+extern void timer_bh(void);
+extern void tqueue_bh(void);
+extern void immediate_bh(void);
 
 void __init sched_init(void)
 {
+	runqueue_t *rq;
+	int i, j, k;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		prio_array_t *array;
+
+		rq = cpu_rq(i);
+		rq->active = rq->arrays;
+		rq->expired = rq->arrays + 1;
+		rq->best_expired_prio = MAX_PRIO;
+
+		spin_lock_init(&rq->lock);
+		INIT_LIST_HEAD(&rq->migration_queue);
+		INIT_LIST_HEAD(&rq->batch_queue);
+		rq->idle_ticks_left = IDLE_TICKS;
+
+		for (j = 0; j < 2; j++) {
+			array = rq->arrays + j;
+			for (k = 0; k < MAX_PRIO; k++) {
+				INIT_LIST_HEAD(array->queue + k);
+				__clear_bit(k, array->bitmap);
+			}
+			// delimiter for bitsearch
+			__set_bit(MAX_PRIO, array->bitmap);
+		}
+	}
 	/*
 	 * We have to do a little magic to get the first
 	 * process right in SMP mode.
 	 */
-	int cpu = smp_processor_id();
-	int nr;
-
-	init_task.processor = cpu;
-
-	for(nr = 0; nr < PIDHASH_SZ; nr++)
-		pidhash[nr] = NULL;
+	rq = this_rq();
+	rq->curr = current;
+	rq->idle = current;
+	set_task_cpu(current, smp_processor_id());
+	wake_up_process(current);
 
 	init_timervecs();
-
 	init_bh(TIMER_BH, timer_bh);
 	init_bh(TQUEUE_BH, tqueue_bh);
 	init_bh(IMMEDIATE_BH, immediate_bh);
@@ -1393,5 +2336,94 @@
 	 * The boot idle thread does lazy MMU switching as well:
 	 */
 	atomic_inc(&init_mm.mm_count);
-	enter_lazy_tlb(&init_mm, current, cpu);
+	enter_lazy_tlb(&init_mm, current, smp_processor_id());
 }
+
+#if LOWLATENCY_NEEDED
+#if LOWLATENCY_DEBUG
+
+static struct lolat_stats_t *lolat_stats_head;
+static spinlock_t lolat_stats_lock = SPIN_LOCK_UNLOCKED;
+
+void set_running_and_schedule(struct lolat_stats_t *stats)
+{
+	spin_lock(&lolat_stats_lock);
+	if (stats->visited == 0) {
+		stats->visited = 1;
+		stats->next = lolat_stats_head;
+		lolat_stats_head = stats;
+	}
+	stats->count++;
+	spin_unlock(&lolat_stats_lock);
+
+	if (current->state != TASK_RUNNING)
+		set_current_state(TASK_RUNNING);
+	schedule();
+}
+
+void show_lolat_stats(void)
+{
+	struct lolat_stats_t *stats = lolat_stats_head;
+
+	printk("Low latency scheduling stats:\n");
+	while (stats) {
+		printk("%s:%d: %lu\n", stats->file, stats->line, stats->count);
+		stats->count = 0;
+		stats = stats->next;
+	}
+}
+
+#else	/* LOWLATENCY_DEBUG */
+
+void set_running_and_schedule()
+{
+	if (current->state != TASK_RUNNING)
+		__set_current_state(TASK_RUNNING);
+	schedule();
+}
+
+#endif	/* LOWLATENCY_DEBUG */
+
+int ll_copy_to_user(void *to_user, const void *from, unsigned long len)
+{
+	while (len) {
+		unsigned long n_to_copy = len;
+		unsigned long remainder;
+
+		if (n_to_copy > 4096)
+			n_to_copy = 4096;
+		remainder = copy_to_user(to_user, from, n_to_copy);
+		if (remainder)
+			return remainder + len;
+		to_user = ((char *)to_user) + n_to_copy;
+		from = ((char *)from) + n_to_copy;
+		len -= n_to_copy;
+		conditional_schedule();
+	}
+	return 0;
+}
+
+int ll_copy_from_user(void *to, const void *from_user, unsigned long len)
+{
+	while (len) {
+		unsigned long n_to_copy = len;
+		unsigned long remainder;
+
+		if (n_to_copy > 4096)
+			n_to_copy = 4096;
+		remainder = copy_from_user(to, from_user, n_to_copy);
+		if (remainder)
+			return remainder + len;
+		to = ((char *)to) + n_to_copy;
+		from_user = ((char *)from_user) + n_to_copy;
+		len -= n_to_copy;
+		conditional_schedule();
+	}
+	return 0;
+}
+
+#ifdef CONFIG_LOLAT_SYSCTL
+struct low_latency_enable_struct __enable_lowlatency = { 0, };
+#endif
+
+#endif	/* LOWLATENCY_NEEDED */
diff -Nur linux-2.4.33-imedia/kernel/signal.c linux-2.4.33-imedia-patching/kernel/signal.c
--- linux-2.4.33-imedia/kernel/signal.c	2004-02-18 15:36:32.000000000 +0200
+++ linux-2.4.33-imedia-patching/kernel/signal.c	2006-01-26 15:19:43.000000000 +0200
@@ -507,11 +507,9 @@
 	 * process of changing - but no harm is done by that
 	 * other than doing an extra (lightweight) IPI interrupt.
 	 */
-	spin_lock(&runqueue_lock);
-	if (task_has_cpu(t) && t->processor != smp_processor_id())
-		smp_send_reschedule(t->processor);
-	spin_unlock(&runqueue_lock);
-#endif /* CONFIG_SMP */
+	if ((t->state == TASK_RUNNING) && (t->cpu != cpu()))
+		kick_if_running(t);
+#endif
 
 	if (t->state & TASK_INTERRUPTIBLE) {
 		wake_up_process(t);
diff -Nur linux-2.4.33-imedia/kernel/softirq.c linux-2.4.33-imedia-patching/kernel/softirq.c
--- linux-2.4.33-imedia/kernel/softirq.c	2004-11-17 13:54:22.000000000 +0200
+++ linux-2.4.33-imedia-patching/kernel/softirq.c	2006-01-26 15:19:43.000000000 +0200
@@ -60,7 +60,7 @@
 
 asmlinkage void do_softirq()
 {
-	int cpu = smp_processor_id();
+	int cpu;
 	__u32 pending;
 	unsigned long flags;
 	__u32 mask;
@@ -70,6 +70,8 @@
 
 	local_irq_save(flags);
 
+	cpu = smp_processor_id();
+
 	pending = softirq_pending(cpu);
 
 	if (pending) {
@@ -99,10 +101,11 @@
 			mask &= ~pending;
 			goto restart;
 		}
-		__local_bh_enable();
 
 		if (pending)
 			wakeup_softirqd(cpu);
+
+		__local_bh_enable();
 	}
 
 	local_irq_restore(flags);
@@ -151,10 +154,11 @@
 
 void fastcall __tasklet_schedule(struct tasklet_struct *t)
 {
-	int cpu = smp_processor_id();
+	int cpu;
 	unsigned long flags;
 
 	local_irq_save(flags);
+	cpu = smp_processor_id();
 	t->next = tasklet_vec[cpu].list;
 	tasklet_vec[cpu].list = t;
 	cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
@@ -175,10 +179,11 @@
 
 static void tasklet_action(struct softirq_action *a)
 {
-	int cpu = smp_processor_id();
+	int cpu;
 	struct tasklet_struct *list;
 
 	local_irq_disable();
+	cpu = smp_processor_id();
 	list = tasklet_vec[cpu].list;
 	tasklet_vec[cpu].list = NULL;
 	local_irq_enable();
@@ -209,10 +214,11 @@
 
 static void tasklet_hi_action(struct softirq_action *a)
 {
-	int cpu = smp_processor_id();
+	int cpu;
 	struct tasklet_struct *list;
 
 	local_irq_disable();
+	cpu = smp_processor_id();
 	list = tasklet_hi_vec[cpu].list;
 	tasklet_hi_vec[cpu].list = NULL;
 	local_irq_enable();
@@ -364,13 +370,13 @@
 	int cpu = cpu_logical_map(bind_cpu);
 
 	daemonize();
-	current->nice = 19;
+	set_user_nice(current, 19);
 	sigfillset(&current->blocked);
 
 	/* Migrate to the right CPU */
-	current->cpus_allowed = 1UL << cpu;
-	while (smp_processor_id() != cpu)
-		schedule();
+	set_cpus_allowed(current, 1UL << cpu);
+	if (cpu() != cpu)
+		BUG();
 
 	sprintf(current->comm, "ksoftirqd_CPU%d", bind_cpu);
 
@@ -395,7 +401,7 @@
 	}
 }
 
-static __init int spawn_ksoftirqd(void)
+__init int spawn_ksoftirqd(void)
 {
 	int cpu;
 
diff -Nur linux-2.4.33-imedia/kernel/sys.c linux-2.4.33-imedia-patching/kernel/sys.c
--- linux-2.4.33-imedia/kernel/sys.c	2003-11-28 20:26:21.000000000 +0200
+++ linux-2.4.33-imedia-patching/kernel/sys.c	2006-01-26 15:19:43.000000000 +0200
@@ -225,7 +225,7 @@
 	error = -ESRCH;
 	if (niceval < -20)
 		niceval = -20;
-	if (niceval > 19)
+	if (niceval > 18)
 		niceval = 19;
 
 	read_lock(&tasklist_lock);
@@ -239,10 +239,13 @@
 		}
 		if (error == -ESRCH)
 			error = 0;
-		if (niceval < p->nice && !capable(CAP_SYS_NICE))
+		if (niceval < task_nice(p) && !capable(CAP_SYS_NICE))
 			error = -EACCES;
-		else
-			p->nice = niceval;
+		else {
+			if (niceval > 18 && p->mm)
+				p->policy = SCHED_BATCH;
+			set_user_nice(p, niceval);
+		}
 	}
 	read_unlock(&tasklist_lock);
 
@@ -268,7 +271,7 @@
 		long niceval;
 		if (!proc_sel(p, which, who))
 			continue;
-		niceval = 20 - p->nice;
+		niceval = 20 - task_nice(p);
 		if (niceval > retval)
 			retval = niceval;
 	}
@@ -320,6 +323,7 @@
 		notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
 		printk(KERN_EMERG "System halted.\n");
 		machine_halt();
+		unlock_kernel();
 		do_exit(0);
 		break;
 
@@ -327,6 +331,7 @@
 		notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
 		printk(KERN_EMERG "Power down.\n");
 		machine_power_off();
+		unlock_kernel();
 		do_exit(0);
 		break;
 
diff -Nur linux-2.4.33-imedia/kernel/sysctl.c linux-2.4.33-imedia-patching/kernel/sysctl.c
--- linux-2.4.33-imedia/kernel/sysctl.c	2006-01-11 20:29:28.000000000 +0200
+++ linux-2.4.33-imedia-patching/kernel/sysctl.c	2006-01-26 15:19:43.000000000 +0200
@@ -278,6 +278,10 @@
 	{KERN_EXCEPTION_TRACE,"exception-trace",
 	 &exception_trace,sizeof(int),0644,NULL,&proc_dointvec},
 #endif	
+#ifdef CONFIG_LOLAT_SYSCTL
+	{KERN_LOWLATENCY, "lowlatency", &enable_lowlatency, sizeof (int),
+	 0644, NULL, &proc_dointvec},
+#endif
 	{0}
 };
 
diff -Nur linux-2.4.33-imedia/kernel/timer.c linux-2.4.33-imedia-patching/kernel/timer.c
--- linux-2.4.33-imedia/kernel/timer.c	2002-11-29 01:53:15.000000000 +0200
+++ linux-2.4.33-imedia-patching/kernel/timer.c	2006-01-26 15:19:43.000000000 +0200
@@ -25,6 +25,8 @@
 
 #include <asm/uaccess.h>
 
+struct kernel_stat kstat;
+
 /*
  * Timekeeping variables
  */
@@ -598,25 +600,7 @@
 	int cpu = smp_processor_id(), system = user_tick ^ 1;
 
 	update_one_process(p, user_tick, system, cpu);
-	if (p->pid) {
-		if (--p->counter <= 0) {
-			p->counter = 0;
-			/*
-			 * SCHED_FIFO is priority preemption, so this is 
-			 * not the place to decide whether to reschedule a
-			 * SCHED_FIFO task or not - Bhavesh Davda
-			 */
-			if (p->policy != SCHED_FIFO) {
-				p->need_resched = 1;
-			}
-		}
-		if (p->nice > 0)
-			kstat.per_cpu_nice[cpu] += user_tick;
-		else
-			kstat.per_cpu_user[cpu] += user_tick;
-		kstat.per_cpu_system[cpu] += system;
-	} else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
-		kstat.per_cpu_system[cpu] += system;
+	scheduler_tick(user_tick, system);
 }
 
 /*
@@ -624,17 +608,7 @@
  */
 static unsigned long count_active_tasks(void)
 {
-	struct task_struct *p;
-	unsigned long nr = 0;
-
-	read_lock(&tasklist_lock);
-	for_each_task(p) {
-		if ((p->state == TASK_RUNNING ||
-		     (p->state & TASK_UNINTERRUPTIBLE)))
-			nr += FIXED_1;
-	}
-	read_unlock(&tasklist_lock);
-	return nr;
+	return (nr_running() + nr_uninterruptible()) * FIXED_1;
 }
 
 /*
@@ -827,6 +801,89 @@
 
 #endif
 
+static void process_timeout(unsigned long __data)
+{
+	wake_up_process((task_t *)__data);
+}
+
+/**
+ * schedule_timeout - sleep until timeout
+ * @timeout: timeout value in jiffies
+ *
+ * Make the current task sleep until @timeout jiffies have
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
+ * pass before the routine returns. The routine will return 0
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task. In this case the remaining time
+ * in jiffies will be returned, or 0 if the timer expired in time
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
+ * the CPU away without a bound on the timeout. In this case the return
+ * value will be %MAX_SCHEDULE_TIMEOUT.
+ *
+ * In all cases the return value is guaranteed to be non-negative.
+ */
+signed long fastcall schedule_timeout(signed long timeout)
+{
+	struct timer_list timer;
+	unsigned long expire;
+
+	switch (timeout)
+	{
+	case MAX_SCHEDULE_TIMEOUT:
+		/*
+		 * These two special cases are useful to be comfortable
+		 * in the caller. Nothing more. We could take
+		 * MAX_SCHEDULE_TIMEOUT from one of the negative value
+		 * but I' d like to return a valid offset (>=0) to allow
+		 * the caller to do everything it want with the retval.
+		 */
+		schedule();
+		goto out;
+	default:
+		/*
+		 * Another bit of PARANOID. Note that the retval will be
+		 * 0 since no piece of kernel is supposed to do a check
+		 * for a negative retval of schedule_timeout() (since it
+		 * should never happens anyway). You just have the printk()
+		 * that will tell you if something is gone wrong and where.
+		 */
+		if (timeout < 0)
+		{
+			printk(KERN_ERR "schedule_timeout: wrong timeout "
+				"value %lx from %p\n", timeout,
+				__builtin_return_address(0));
+			current->state = TASK_RUNNING;
+			goto out;
+		}
+	}
+
+	expire = timeout + jiffies;
+
+	init_timer(&timer);
+	timer.expires = expire;
+	timer.data = (unsigned long) current;
+	timer.function = process_timeout;
+
+	add_timer(&timer);
+	schedule();
+	del_timer_sync(&timer);
+
+	timeout = expire - jiffies;
+
+out:
+	return timeout < 0 ? 0 : timeout;
+}
+
 /* Thread ID - the internal kernel "pid" */
 asmlinkage long sys_gettid(void)
 {
@@ -873,4 +930,3 @@
 	}
 	return 0;
 }
-
diff -Nur linux-2.4.33-imedia/lib/dec_and_lock.c linux-2.4.33-imedia-patching/lib/dec_and_lock.c
--- linux-2.4.33-imedia/lib/dec_and_lock.c	2001-10-03 19:11:26.000000000 +0300
+++ linux-2.4.33-imedia-patching/lib/dec_and_lock.c	2006-01-26 15:19:43.000000000 +0200
@@ -1,5 +1,6 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
+#include <linux/sched.h>
 #include <asm/atomic.h>
 
 /*
diff -Nur linux-2.4.33-imedia/mm/filemap.c linux-2.4.33-imedia-patching/mm/filemap.c
--- linux-2.4.33-imedia/mm/filemap.c	2005-06-01 03:56:56.000000000 +0300
+++ linux-2.4.33-imedia-patching/mm/filemap.c	2006-01-26 15:19:43.000000000 +0200
@@ -185,7 +185,9 @@
 {
 	struct list_head *head, *curr;
 	struct page * page;
+	int ll_count = 100;
 
+restart:
 	head = &inode->i_mapping->clean_pages;
 
 	spin_lock(&pagemap_lru_lock);
@@ -196,6 +198,14 @@
 		page = list_entry(curr, struct page, list);
 		curr = curr->next;
 
+		if (conditional_schedule_needed() && ll_count) {
+			spin_unlock(&pagecache_lock);
+			spin_unlock(&pagemap_lru_lock);
+			unconditional_schedule();
+			ll_count--;
+			goto restart;
+		}
+
 		/* We cannot invalidate something in dirty.. */
 		if (PageDirty(page))
 			continue;
@@ -259,8 +269,8 @@
 	page_cache_release(page);
 }
 
-static int FASTCALL(truncate_list_pages(struct list_head *, unsigned long, unsigned *));
-static int fastcall truncate_list_pages(struct list_head *head, unsigned long start, unsigned *partial)
+static int FASTCALL(truncate_list_pages(struct list_head *, unsigned long, unsigned *, int *));
+static int fastcall truncate_list_pages(struct list_head *head, unsigned long start, unsigned *partial, int *restart_count)
 {
 	struct list_head *curr;
 	struct page * page;
@@ -271,6 +281,17 @@
 	while (curr != head) {
 		unsigned long offset;
 
+		if (conditional_schedule_needed() && *restart_count) {
+			(*restart_count)--;
+			list_del(head);
+			list_add(head, curr);		/* Restart on this page */
+			spin_unlock(&pagecache_lock);
+			unconditional_schedule();
+			spin_lock(&pagecache_lock);
+			unlocked = 1;
+			goto restart;
+		}
+
 		page = list_entry(curr, struct page, list);
 		offset = page->index;
 
@@ -303,13 +324,11 @@
 			} else
  				wait_on_page(page);
 
-			page_cache_release(page);
-
-			if (current->need_resched) {
-				__set_current_state(TASK_RUNNING);
-				schedule();
+			if (LOWLATENCY_NEEDED) {
+				*restart_count = 4;	/* We made progress */
 			}
 
+			page_cache_release(page);
 			spin_lock(&pagecache_lock);
 			goto restart;
 		}
@@ -332,13 +351,14 @@
 {
 	unsigned long start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
+	int restart_count = 4;
 	int unlocked;
 
 	spin_lock(&pagecache_lock);
 	do {
-		unlocked = truncate_list_pages(&mapping->clean_pages, start, &partial);
-		unlocked |= truncate_list_pages(&mapping->dirty_pages, start, &partial);
-		unlocked |= truncate_list_pages(&mapping->locked_pages, start, &partial);
+		unlocked = truncate_list_pages(&mapping->clean_pages, start, &partial, &restart_count);
+		unlocked |= truncate_list_pages(&mapping->dirty_pages, start, &partial, &restart_count);
+		unlocked |= truncate_list_pages(&mapping->locked_pages, start, &partial, &restart_count);
 	} while (unlocked);
 	/* Traversed all three lists without dropping the lock */
 	spin_unlock(&pagecache_lock);
@@ -483,6 +503,7 @@
 
 		page_cache_get(page);
 		spin_unlock(&pagecache_lock);
+		conditional_schedule();		/* sys_msync() (only used by minixfs, udf) */
 		lock_page(page);
 
 		/* The buffers could have been free'd while we waited for the page lock */
@@ -612,12 +633,14 @@
 		list_del(&page->list);
 		list_add(&page->list, &mapping->locked_pages);
 
-		if (!PageDirty(page))
-			continue;
-
 		page_cache_get(page);
 		spin_unlock(&pagecache_lock);
 
+		conditional_schedule();		/* sys_msync() */
+
+		if (!PageDirty(page))
+			goto clean;
+
 		lock_page(page);
 
 		if (PageDirty(page)) {
@@ -628,7 +651,7 @@
 				ret = err;
 		} else
 			UnlockPage(page);
-
+clean:
 		page_cache_release(page);
 		spin_lock(&pagecache_lock);
 	}
@@ -646,7 +669,8 @@
 int filemap_fdatawait(struct address_space * mapping)
 {
 	int ret = 0;
-
+	DEFINE_RESCHED_COUNT;
+restart:
 	spin_lock(&pagecache_lock);
 
         while (!list_empty(&mapping->locked_pages)) {
@@ -655,6 +679,17 @@
 		list_del(&page->list);
 		list_add(&page->list, &mapping->clean_pages);
 
+		if (TEST_RESCHED_COUNT(32)) {
+			RESET_RESCHED_COUNT();
+			if (conditional_schedule_needed()) {
+				page_cache_get(page);
+				spin_unlock(&pagecache_lock);
+				unconditional_schedule();
+				page_cache_release(page);
+				goto restart;
+			}
+		}
+
 		if (!PageLocked(page))
 			continue;
 
@@ -764,8 +799,10 @@
 	spin_lock(&pagecache_lock);
 	page = __find_page_nolock(mapping, offset, *hash);
 	spin_unlock(&pagecache_lock);
-	if (page)
+	if (page) {
+		conditional_schedule();
 		return 0;
+	}
 
 	page = page_cache_alloc(mapping);
 	if (!page)
@@ -1035,6 +1072,11 @@
 	 * the hash-list needs a held write-lock.
 	 */
 repeat:
+	if (conditional_schedule_needed()) {
+		spin_unlock(&pagecache_lock);
+		unconditional_schedule();
+		spin_lock(&pagecache_lock);
+	}
 	page = __find_page_nolock(mapping, offset, hash);
 	if (page) {
 		page_cache_get(page);
@@ -1488,6 +1530,8 @@
 		page_cache_get(page);
 		spin_unlock(&pagecache_lock);
 
+		conditional_schedule();		/* sys_read() */
+
 		if (!Page_Uptodate(page))
 			goto page_not_up_to_date;
 		generic_file_readahead(reada_ok, filp, inode, page);
@@ -2247,6 +2291,12 @@
 		address += PAGE_SIZE;
 		pte++;
 	} while (address && (address < end));
+
+	if (conditional_schedule_needed()) {
+		spin_unlock(&vma->vm_mm->page_table_lock);
+		unconditional_schedule();		/* syncing large mapped files */
+		spin_lock(&vma->vm_mm->page_table_lock);
+	}
 	return error;
 }
 
@@ -2658,7 +2708,9 @@
 	if (vma->vm_flags & VM_LOCKED)
 		return -EINVAL;
 
-	zap_page_range(vma->vm_mm, start, end - start);
+        zap_page_range(vma->vm_mm, start, end - start,
+		ZPR_COND_RESCHED);        /* sys_madvise(MADV_DONTNEED) */
+
 	return 0;
 }
 
@@ -3228,6 +3280,9 @@
 			goto sync_failure;
 		page_fault = __copy_from_user(kaddr+offset, buf, bytes);
 		flush_dcache_page(page);
+
+                conditional_schedule();
+
 		status = mapping->a_ops->commit_write(file, page, offset, offset+bytes);
 		if (page_fault)
 			goto fail_write;
diff -Nur linux-2.4.33-imedia/mm/memory.c linux-2.4.33-imedia-patching/mm/memory.c
--- linux-2.4.33-imedia/mm/memory.c	2005-04-04 04:42:20.000000000 +0300
+++ linux-2.4.33-imedia-patching/mm/memory.c	2006-01-26 15:19:43.000000000 +0200
@@ -357,7 +357,7 @@
 /*
  * remove user pages in a given range.
  */
-void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
+static void do_zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
 {
 	mmu_gather_t *tlb;
 	pgd_t * dir;
@@ -478,6 +478,10 @@
 			struct page *map;
 			while (!(map = follow_page(mm, start, write))) {
 				spin_unlock(&mm->page_table_lock);
+
+				/* Pinning down many physical pages (kiobufs, mlockall) */
+				conditional_schedule();
+
 				switch (handle_mm_fault(mm, vma, start, write)) {
 				case 1:
 					tsk->min_flt++;
@@ -641,6 +645,21 @@
 	iobuf->locked = 0;
 }
 
+#define MAX_ZAP_BYTES 256*PAGE_SIZE
+
+void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, int actions)
+{
+	while (size) {
+		unsigned long chunk = size;
+		if (actions & ZPR_COND_RESCHED && chunk > MAX_ZAP_BYTES)
+			chunk = MAX_ZAP_BYTES;
+		do_zap_page_range(mm, address, chunk);
+		if (actions & ZPR_COND_RESCHED)
+			conditional_schedule();
+		address += chunk;
+		size -= chunk;
+	}
+}
 
 /*
  * Lock down all of the pages of a kiovec for IO.
@@ -750,11 +769,18 @@
 	return 0;
 }
 
-static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
-                                     unsigned long size, pgprot_t prot)
+static inline void zeromap_pte_range(struct mm_struct *mm, pte_t * pte,
+				unsigned long address, unsigned long size,
+				pgprot_t prot)
 {
 	unsigned long end;
 
+	if (conditional_schedule_needed()) {
+		spin_unlock(&mm->page_table_lock);
+		unconditional_schedule();		/* mmap(/dev/zero) */
+		spin_lock(&mm->page_table_lock);
+	}
+
 	address &= ~PMD_MASK;
 	end = address + size;
 	if (end > PMD_SIZE)
@@ -782,7 +808,7 @@
 		pte_t * pte = pte_alloc(mm, pmd, address);
 		if (!pte)
 			return -ENOMEM;
-		zeromap_pte_range(pte, address, end - address, prot);
+		zeromap_pte_range(mm, pte, address, end - address, prot);
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
@@ -1017,7 +1043,7 @@
 
 		/* mapping wholly truncated? */
 		if (mpnt->vm_pgoff >= pgoff) {
-			zap_page_range(mm, start, len);
+                        zap_page_range(mm, start, len, 0);
 			continue;
 		}
 
@@ -1030,7 +1056,7 @@
 		/* Ok, partially affected.. */
 		start += diff << PAGE_SHIFT;
 		len = (len - diff) << PAGE_SHIFT;
-		zap_page_range(mm, start, len);
+                zap_page_range(mm, start, len, 0);
 	} while ((mpnt = mpnt->vm_next_share) != NULL);
 }
 
diff -Nur linux-2.4.33-imedia/mm/mmap.c linux-2.4.33-imedia-patching/mm/mmap.c
--- linux-2.4.33-imedia/mm/mmap.c	2005-01-19 16:10:13.000000000 +0200
+++ linux-2.4.33-imedia-patching/mm/mmap.c	2006-01-26 15:19:43.000000000 +0200
@@ -600,7 +600,7 @@
 	fput(file);
 
 	/* Undo any partial mapping done by a device driver. */
-	zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start);
+        zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start, 0);
 free_vma:
 	kmem_cache_free(vm_area_cachep, vma);
 	return error;
@@ -1000,7 +1000,7 @@
 		remove_shared_vm_struct(mpnt);
 		mm->map_count--;
 
-		zap_page_range(mm, st, size);
+                zap_page_range(mm, st, size, ZPR_COND_RESCHED);   /* sys_munmap() */
 
 		/*
 		 * Fix the mapping, and free the old area if it wasn't reused.
@@ -1175,7 +1175,7 @@
 		}
 		mm->map_count--;
 		remove_shared_vm_struct(mpnt);
-		zap_page_range(mm, start, size);
+		zap_page_range(mm, start, size, ZPR_COND_RESCHED);      /* sys_exit() */
 		if (mpnt->vm_file)
 			fput(mpnt->vm_file);
 		kmem_cache_free(vm_area_cachep, mpnt);
diff -Nur linux-2.4.33-imedia/mm/mremap.c linux-2.4.33-imedia-patching/mm/mremap.c
--- linux-2.4.33-imedia/mm/mremap.c	2005-01-19 16:10:13.000000000 +0200
+++ linux-2.4.33-imedia-patching/mm/mremap.c	2006-01-26 15:19:43.000000000 +0200
@@ -122,7 +122,7 @@
 	flush_cache_range(mm, new_addr, new_addr + len);
 	while ((offset += PAGE_SIZE) < len)
 		move_one_page(mm, new_addr + offset, old_addr + offset);
-	zap_page_range(mm, new_addr, len);
+        zap_page_range(mm, new_addr, len, 0);
 	return -1;
 }
 
diff -Nur linux-2.4.33-imedia/mm/oom_kill.c linux-2.4.33-imedia-patching/mm/oom_kill.c
--- linux-2.4.33-imedia/mm/oom_kill.c	2004-11-17 13:54:22.000000000 +0200
+++ linux-2.4.33-imedia-patching/mm/oom_kill.c	2006-01-26 15:19:43.000000000 +0200
@@ -86,7 +86,7 @@
 	 * Niced processes are most likely less important, so double
 	 * their badness points.
 	 */
-	if (p->nice > 0)
+	if (task_nice(p) > 0)
 		points *= 2;
 
 	/*
@@ -150,7 +150,7 @@
 	 * all the memory it needs. That way it should be able to
 	 * exit() and clear out its resources quickly...
 	 */
-	p->counter = 5 * HZ;
+	p->time_slice = HZ;
 	p->flags |= PF_MEMALLOC | PF_MEMDIE;
 
 	/* This process has hardware access, be more careful. */
diff -Nur linux-2.4.33-imedia/mm/slab.c linux-2.4.33-imedia-patching/mm/slab.c
--- linux-2.4.33-imedia/mm/slab.c	2004-11-17 13:54:22.000000000 +0200
+++ linux-2.4.33-imedia-patching/mm/slab.c	2006-01-26 15:19:43.000000000 +0200
@@ -49,7 +49,8 @@
  *  constructors and destructors are called without any locking.
  *  Several members in kmem_cache_t and slab_t never change, they
  *	are accessed without any locking.
- *  The per-cpu arrays are never accessed from the wrong cpu, no locking.
+ *  The per-cpu arrays are never accessed from the wrong cpu, no locking,
+ *  	and local interrupts are disabled so slab code is preempt-safe.
  *  The non-constant members are protected with a per-cache irq spinlock.
  *
  * Further notes from the original documentation:
@@ -858,12 +859,14 @@
  */
 static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
 {
+	preempt_disable();
 	local_irq_disable();
 	func(arg);
 	local_irq_enable();
 
 	if (smp_call_function(func, arg, 1, 1))
 		BUG();
+	preempt_enable();
 }
 typedef struct ccupdate_struct_s
 {
@@ -935,6 +938,7 @@
 		list_del(&slabp->list);
 
 		spin_unlock_irq(&cachep->spinlock);
+		conditional_schedule();
 		kmem_slab_destroy(cachep, slabp);
 		ret++;
 		spin_lock_irq(&cachep->spinlock);
@@ -1851,6 +1855,7 @@
 		 */
 		spin_unlock_irq(&best_cachep->spinlock);
 		kmem_slab_destroy(best_cachep, slabp);
+		conditional_schedule();		/* try_to_free_pages() */
 		spin_lock_irq(&best_cachep->spinlock);
 	}
 	spin_unlock_irq(&best_cachep->spinlock);
diff -Nur linux-2.4.33-imedia/mm/swapfile.c linux-2.4.33-imedia-patching/mm/swapfile.c
--- linux-2.4.33-imedia/mm/swapfile.c	2005-04-04 04:42:20.000000000 +0300
+++ linux-2.4.33-imedia-patching/mm/swapfile.c	2006-01-26 15:19:43.000000000 +0200
@@ -834,7 +834,7 @@
 				len += sprintf(buf + len, "partition\t");
 
 			usedswap = 0;
-			for (j = 0; j < ptr->max; ++j)
+			for (j = 0; j < ptr->max; ++j) {
 				switch (ptr->swap_map[j]) {
 					case SWAP_MAP_BAD:
 					case 0:
@@ -842,6 +842,8 @@
 					default:
 						usedswap++;
 				}
+				conditional_schedule();
+			}
 			len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10), 
 				usedswap << (PAGE_SHIFT - 10), ptr->prio);
 		}
@@ -1140,6 +1142,11 @@
 		if (swap_info[i].flags != SWP_USED)
 			continue;
 		for (j = 0; j < swap_info[i].max; ++j) {
+			if (conditional_schedule_needed()) {
+				swap_list_unlock();
+				conditional_schedule();
+				swap_list_lock();
+			}
 			switch (swap_info[i].swap_map[j]) {
 				case 0:
 				case SWAP_MAP_BAD:
diff -Nur linux-2.4.33-imedia/mm/vmscan.c linux-2.4.33-imedia-patching/mm/vmscan.c
--- linux-2.4.33-imedia/mm/vmscan.c	2005-11-16 21:12:54.000000000 +0200
+++ linux-2.4.33-imedia-patching/mm/vmscan.c	2006-01-26 15:19:43.000000000 +0200
@@ -210,6 +210,7 @@
 {
 	pte_t * pte;
 	unsigned long pmd_end;
+	DEFINE_RESCHED_COUNT;
 
 	if (pmd_none(*dir))
 		return count;
@@ -235,11 +236,17 @@
 					address += PAGE_SIZE;
 					break;
 				}
+                                if (TEST_RESCHED_COUNT(4)) {
+                                        if (conditional_schedule_needed())
+						goto out;
+                                        RESET_RESCHED_COUNT();
+                                }
 			}
 		}
 		address += PAGE_SIZE;
 		pte++;
 	} while (address && (address < end));
+out:
 	mm->swap_address = address;
 	return count;
 }
@@ -268,6 +275,8 @@
 		count = swap_out_pmd(mm, vma, pmd, address, end, count, classzone);
 		if (!count)
 			break;
+		if (conditional_schedule_needed())
+			return count;
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
@@ -292,6 +301,8 @@
 		count = swap_out_pgd(mm, vma, pgdir, address, end, count, classzone);
 		if (!count)
 			break;
+		if (conditional_schedule_needed())
+			return count;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		pgdir++;
 	} while (address && (address < end));
@@ -313,6 +324,7 @@
 	 * Find the proper vm-area after freezing the vma chain 
 	 * and ptes.
 	 */
+continue_scan:
 	spin_lock(&mm->page_table_lock);
 	address = mm->swap_address;
 	if (address == TASK_SIZE || swap_mm != mm) {
@@ -330,6 +342,12 @@
 			vma = vma->vm_next;
 			if (!vma)
 				break;
+                        if (conditional_schedule_needed()) {    /* Scanning a large vma */
+                                spin_unlock(&mm->page_table_lock);
+                                unconditional_schedule();
+                                /* Continue from where we left off */
+                                goto continue_scan;
+                        }
 			if (!count)
 				goto out_unlock;
 			address = vma->vm_start;
diff -Nur linux-2.4.33-imedia/net/bluetooth/bnep/core.c linux-2.4.33-imedia-patching/net/bluetooth/bnep/core.c
--- linux-2.4.33-imedia/net/bluetooth/bnep/core.c	2004-08-08 02:26:06.000000000 +0300
+++ linux-2.4.33-imedia-patching/net/bluetooth/bnep/core.c	2006-01-26 15:19:43.000000000 +0200
@@ -479,7 +479,7 @@
 	sigfillset(&current->blocked);
 	flush_signals(current);
 
-	current->nice = -15;
+	set_user_nice(current, -15);
 
 	set_fs(KERNEL_DS);
 
diff -Nur linux-2.4.33-imedia/net/bluetooth/cmtp/core.c linux-2.4.33-imedia-patching/net/bluetooth/cmtp/core.c
--- linux-2.4.33-imedia/net/bluetooth/cmtp/core.c	2003-08-25 14:44:44.000000000 +0300
+++ linux-2.4.33-imedia-patching/net/bluetooth/cmtp/core.c	2006-01-26 15:19:43.000000000 +0200
@@ -298,7 +298,7 @@
 	sigfillset(&current->blocked);
 	flush_signals(current);
 
-	current->nice = -15;
+	set_user_nice(current, -15);
 
 	set_fs(KERNEL_DS);
 
diff -Nur linux-2.4.33-imedia/net/core/dev.c linux-2.4.33-imedia-patching/net/core/dev.c
--- linux-2.4.33-imedia/net/core/dev.c	2005-04-04 04:42:20.000000000 +0300
+++ linux-2.4.33-imedia-patching/net/core/dev.c	2006-01-26 15:19:43.000000000 +0200
@@ -1093,9 +1093,15 @@
 		int cpu = smp_processor_id();
 
 		if (dev->xmit_lock_owner != cpu) {
+			/*
+			 * The spin_lock effectivly does a preempt lock, but 
+			 * we are about to drop that...
+			 */
+			preempt_disable();
 			spin_unlock(&dev->queue_lock);
 			spin_lock(&dev->xmit_lock);
 			dev->xmit_lock_owner = cpu;
+			preempt_enable();
 
 			if (!netif_queue_stopped(dev)) {
 				if (netdev_nit)
@@ -1274,7 +1280,7 @@
 
 int netif_rx(struct sk_buff *skb)
 {
-	int this_cpu = smp_processor_id();
+	int this_cpu;
 	struct softnet_data *queue;
 	unsigned long flags;
 
@@ -1284,9 +1290,10 @@
 	/* The code is rearranged so that the path is the most
 	   short when CPU is congested, but is still operating.
 	 */
-	queue = &softnet_data[this_cpu];
 
 	local_irq_save(flags);
+	this_cpu = smp_processor_id();
+	queue = &softnet_data[this_cpu];
 
 	netdev_rx_stat[this_cpu].total++;
 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
diff -Nur linux-2.4.33-imedia/net/core/iovec.c linux-2.4.33-imedia-patching/net/core/iovec.c
--- linux-2.4.33-imedia/net/core/iovec.c	2001-09-10 17:57:00.000000000 +0300
+++ linux-2.4.33-imedia-patching/net/core/iovec.c	2006-01-26 15:19:43.000000000 +0200
@@ -88,7 +88,7 @@
 		if(iov->iov_len)
 		{
 			int copy = min_t(unsigned int, iov->iov_len, len);
-			if (copy_to_user(iov->iov_base, kdata, copy))
+                        if (ll_copy_to_user(iov->iov_base, kdata, copy))
 				goto out;
 			kdata+=copy;
 			len-=copy;
diff -Nur linux-2.4.33-imedia/net/core/skbuff.c linux-2.4.33-imedia-patching/net/core/skbuff.c
--- linux-2.4.33-imedia/net/core/skbuff.c	2003-08-25 14:44:44.000000000 +0300
+++ linux-2.4.33-imedia-patching/net/core/skbuff.c	2006-01-26 15:19:43.000000000 +0200
@@ -111,33 +111,37 @@
 
 static __inline__ struct sk_buff *skb_head_from_pool(void)
 {
-	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
+	struct sk_buff_head *list;
+	struct sk_buff *skb = NULL;
+	unsigned long flags;
 
-	if (skb_queue_len(list)) {
-		struct sk_buff *skb;
-		unsigned long flags;
+	local_irq_save(flags);
 
-		local_irq_save(flags);
+	list = &skb_head_pool[smp_processor_id()].list;
+
+	if (skb_queue_len(list))
 		skb = __skb_dequeue(list);
-		local_irq_restore(flags);
-		return skb;
-	}
-	return NULL;
+
+	local_irq_restore(flags);
+	return skb;
 }
 
 static __inline__ void skb_head_to_pool(struct sk_buff *skb)
 {
-	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
+	struct sk_buff_head *list;
+	unsigned long flags;
 
-	if (skb_queue_len(list) < sysctl_hot_list_len) {
-		unsigned long flags;
+	local_irq_save(flags);
+	list = &skb_head_pool[smp_processor_id()].list;
 
-		local_irq_save(flags);
+	if (skb_queue_len(list) < sysctl_hot_list_len) {
 		__skb_queue_head(list, skb);
 		local_irq_restore(flags);
 
 		return;
 	}
+
+	local_irq_restore(flags);
 	kmem_cache_free(skbuff_head_cache, skb);
 }
 
diff -Nur linux-2.4.33-imedia/net/ipv4/tcp_minisocks.c linux-2.4.33-imedia-patching/net/ipv4/tcp_minisocks.c
--- linux-2.4.33-imedia/net/ipv4/tcp_minisocks.c	2004-11-17 13:54:22.000000000 +0200
+++ linux-2.4.33-imedia-patching/net/ipv4/tcp_minisocks.c	2006-01-26 15:19:43.000000000 +0200
@@ -433,6 +433,9 @@
 {
 	struct tcp_tw_bucket *tw;
 	int killed = 0;
+#if LOWLATENCY_NEEDED
+	int max_killed = 0;
+#endif
 
 	/* NOTE: compare this to previous version where lock
 	 * was released after detaching chain. It was racy,
@@ -446,6 +449,13 @@
 		goto out;
 
 	while((tw = tcp_tw_death_row[tcp_tw_death_row_slot]) != NULL) {
+#if LOWLATENCY_NEEDED
+		/* This loop takes ~6 usecs per iteration. */
+		if (killed > 100) {
+			max_killed = 1;
+			break;
+		}
+#endif
 		tcp_tw_death_row[tcp_tw_death_row_slot] = tw->next_death;
 		if (tw->next_death)
 			tw->next_death->pprev_death = tw->pprev_death;
@@ -458,12 +468,24 @@
 		killed++;
 
 		spin_lock(&tw_death_lock);
+
+	}
+
+#if LOWLATENCY_NEEDED
+	if (max_killed) {	/* More to do: do it soon */
+		mod_timer(&tcp_tw_timer, jiffies+2);
+		tcp_tw_count -= killed;
+	}
+	else
+#endif
+	{
+		tcp_tw_death_row_slot =
+			((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
+
+		if ((tcp_tw_count -= killed) != 0)
+			mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
 	}
-	tcp_tw_death_row_slot =
-		((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
 
-	if ((tcp_tw_count -= killed) != 0)
-		mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
 	net_statistics[smp_processor_id()*2].TimeWaited += killed;
 out:
 	spin_unlock(&tw_death_lock);
diff -Nur linux-2.4.33-imedia/net/socket.c linux-2.4.33-imedia-patching/net/socket.c
--- linux-2.4.33-imedia/net/socket.c	2005-01-19 16:10:14.000000000 +0200
+++ linux-2.4.33-imedia-patching/net/socket.c	2006-01-26 15:19:43.000000000 +0200
@@ -132,7 +132,7 @@
 
 static struct net_proto_family *net_families[NPROTO];
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
 static atomic_t net_family_lockct = ATOMIC_INIT(0);
 static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;
 
diff -Nur linux-2.4.33-imedia/net/sunrpc/pmap_clnt.c linux-2.4.33-imedia-patching/net/sunrpc/pmap_clnt.c
--- linux-2.4.33-imedia/net/sunrpc/pmap_clnt.c	2002-08-03 03:39:46.000000000 +0300
+++ linux-2.4.33-imedia-patching/net/sunrpc/pmap_clnt.c	2006-01-26 15:19:43.000000000 +0200
@@ -12,6 +12,7 @@
 #include <linux/config.h>
 #include <linux/types.h>
 #include <linux/socket.h>
+#include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/uio.h>