82 files changed, 1878 insertions, 303 deletions
diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt
new file mode 100644
index 000000000000..13e4bf054c38
--- /dev/null
+++ b/Documentation/ftrace.txt
@@ -0,0 +1,1353 @@
+		ftrace - Function Tracer
+		========================
+
+Copyright 2008 Red Hat Inc.
+Author: Steven Rostedt <srostedt@redhat.com>
+
+
+Introduction
+------------
+
+Ftrace is an internal tracer designed to help out developers and
+designers of systems to find what is going on inside the kernel.
+It can be used for debugging or analyzing latencies and performance
+issues that take place outside of user-space.
+
+Although ftrace is the function tracer, it also includes an
+infrastructure that allows for other types of tracing. Some of the
+tracers that are currently in ftrace is a tracer to trace
+context switches, the time it takes for a high priority task to
+run after it was woken up, the time interrupts are disabled, and
+more.
+
+
+The File System
+---------------
+
+Ftrace uses the debugfs file system to hold the control files as well
+as the files to display output.
+
+To mount the debugfs system:
+
+  # mkdir /debug
+  # mount -t debugfs nodev /debug
+
+
+That's it! (assuming that you have ftrace configured into your kernel)
+
+After mounting the debugfs, you can see a directory called
+"tracing".  This directory contains the control and output files
+of ftrace. Here is a list of some of the key files:
+
+
+ Note: all time values are in microseconds.
+
+  current_tracer : This is used to set or display the current tracer
+		that is configured.
+
+  available_tracers : This holds the different types of tracers that
+		has been compiled into the kernel. The tracers
+		listed here can be configured by echoing in their
+		name into current_tracer.
+
+  tracing_enabled : This sets or displays whether the current_tracer
+		is activated and tracing or not. Echo 0 into this
+		file to disable the tracer or 1 (or non-zero) to
+		enable it.
+
+  trace : This file holds the output of the trace in a human readable
+		format.
+
+  latency_trace : This file shows the same trace but the information
+		is organized more to display possible latencies
+		in the system.
+
+  trace_pipe : The output is the same as the "trace" file but this
+		file is meant to be streamed with live tracing.
+		Reads from this file will block until new data
+		is retrieved. Unlike the "trace" and "latency_trace"
+		files, this file is a consumer. This means reading
+		from this file causes sequential reads to display
+		more current data. Once data is read from this
+		file, it is consumed, and will not be read
+		again with a sequential read. The "trace" and
+		"latency_trace" files are static, and if the
+		tracer isn't adding more data, they will display
+		the same information every time they are read.
+
+  iter_ctrl : This file lets the user control the amount of data
+		that is displayed in one of the above output
+		files.
+
+  trace_max_latency : Some of the tracers record the max latency.
+		For example, the time interrupts are disabled.
+		This time is saved in this file. The max trace
+		will also be stored, and displayed by either
+		"trace" or "latency_trace".  A new max trace will
+		only be recorded if the latency is greater than
+		the value in this file. (in microseconds)
+
+  trace_entries : This sets or displays the number of trace
+		entries each CPU buffer can hold. The tracer buffers
+		are the same size for each CPU, so care must be
+		taken when modifying the trace_entries. The number
+		of actually entries will be the number given
+		times the number of possible CPUS. The buffers
+		are saved as individual pages, and the actual entries
+		will always be rounded up to entries per page.
+
+		This can only be updated when the current_tracer
+		is set to "none".
+
+		NOTE: It is planned on changing the allocated buffers
+		      from being the number of possible CPUS to
+		      the number of online CPUS.
+
+  tracing_cpumask : This is a mask that lets the user only trace
+		on specified CPUS. The format is a hex string
+		representing the CPUS.
+
+  set_ftrace_filter : When dynamic ftrace is configured in, the
+		code is dynamically modified to disable calling
+		of the function profiler (mcount). This lets
+		tracing be configured in with practically no overhead
+		in performance.  This also has a side effect of
+		enabling or disabling specific functions to be
+		traced.  Echoing in names of functions into this
+		file will limit the trace to only those files.
+
+  set_ftrace_notrace: This has the opposite effect that
+		set_ftrace_filter has. Any function that is added
+		here will not be traced. If a function exists
+		in both set_ftrace_filter and set_ftrace_notrace
+		the function will _not_ bet traced.
+
+  available_filter_functions : When a function is encountered the first
+		time by the dynamic tracer, it is recorded and
+		later the call is converted into a nop. This file
+		lists the functions that have been recorded
+		by the dynamic tracer and these functions can
+		be used to set the ftrace filter by the above
+		"set_ftrace_filter" file.
+
+
+The Tracers
+-----------
+
+Here are the list of current tracers that can be configured.
+
+  ftrace - function tracer that uses mcount to trace all functions.
+		It is possible to filter out which functions that are
+		traced when dynamic ftrace is configured in.
+
+  sched_switch - traces the context switches between tasks.
+
+  irqsoff - traces the areas that disable interrupts and saves off
+  		the trace with the longest max latency.
+		See tracing_max_latency.  When a new max is recorded,
+		it replaces the old trace. It is best to view this
+		trace with the latency_trace file.
+
+  preemptoff - Similar to irqsoff but traces and records the time
+		preemption is disabled.
+
+  preemptirqsoff - Similar to irqsoff and preemptoff, but traces and
+		 records the largest time irqs and/or preemption is
+		 disabled.
+
+  wakeup - Traces and records the max latency that it takes for
+		the highest priority task to get scheduled after
+		it has been woken up.
+
+  none - This is not a tracer. To remove all tracers from tracing
+		simply echo "none" into current_tracer.
+
+
+Examples of using the tracer
+----------------------------
+
+Here are typical examples of using the tracers with only controlling
+them with the debugfs interface (without using any user-land utilities).
+
+Output format:
+--------------
+
+Here's an example of the output format of the file "trace"
+
+                             --------
+# tracer: ftrace
+#
+#           TASK-PID   CPU#    TIMESTAMP  FUNCTION
+#              | |      |          |         |
+            bash-4251  [01] 10152.583854: path_put <-path_walk
+            bash-4251  [01] 10152.583855: dput <-path_put
+            bash-4251  [01] 10152.583855: _atomic_dec_and_lock <-dput
+                             --------
+
+A header is printed with the trace that is represented. In this case
+the tracer is "ftrace". Then a header showing the format. Task name
+"bash", the task PID "4251", the CPU that it was running on
+"01", the timestamp in <secs>.<usecs> format, the function name that was
+traced "path_put" and the parent function that called this function
+"path_walk".
+
+The sched_switch tracer also includes tracing of task wake ups and
+context switches.
+
+     ksoftirqd/1-7     [01]  1453.070013:      7:115:R   +  2916:115:S
+     ksoftirqd/1-7     [01]  1453.070013:      7:115:R   +    10:115:S
+     ksoftirqd/1-7     [01]  1453.070013:      7:115:R ==>    10:115:R
+        events/1-10    [01]  1453.070013:     10:115:S ==>  2916:115:R
+     kondemand/1-2916  [01]  1453.070013:   2916:115:S ==>     7:115:R
+     ksoftirqd/1-7     [01]  1453.070013:      7:115:S ==>     0:140:R
+
+Wake ups are represented by a "+" and the context switches show
+"==>".  The format is:
+
+ Context switches:
+
+       Previous task              Next Task
+
+  <pid>:<prio>:<state>  ==>  <pid>:<prio>:<state>
+
+ Wake ups:
+
+       Current task               Task waking up
+
+  <pid>:<prio>:<state>    +  <pid>:<prio>:<state>
+
+The prio is the internal kernel priority, which is inverse to the
+priority that is usually displayed by user-space tools. Zero represents
+the highest priority (99). Prio 100 starts the "nice" priorities with
+100 being equal to nice -20 and 139 being nice 19. The prio "140" is
+reserved for the idle task which is the lowest priority thread (pid 0).
+
+
+Latency trace format
+--------------------
+
+For traces that display latency times, the latency_trace file gives
+a bit more information to see why a latency happened. Here's a typical
+trace.
+
+# tracer: irqsoff
+#
+irqsoff latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 97 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+    -----------------
+    | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0)
+    -----------------
+ => started at: apic_timer_interrupt
+ => ended at:   do_softirq
+
+#                _------=> CPU#
+#               / _-----=> irqs-off
+#              | / _----=> need-resched
+#              || / _---=> hardirq/softirq
+#              ||| / _--=> preempt-depth
+#              |||| /
+#              |||||     delay
+#  cmd     pid ||||| time  |   caller
+#     \   /    |||||   \   |   /
+  <idle>-0     0d..1    0us+: trace_hardirqs_off_thunk (apic_timer_interrupt)
+  <idle>-0     0d.s.   97us : __do_softirq (do_softirq)
+  <idle>-0     0d.s1   98us : trace_hardirqs_on (do_softirq)
+
+
+vim:ft=help
+
+
+This shows that the current tracer is "irqsoff" tracing the time
+interrupts are disabled. It gives the trace version and the kernel
+this was executed on (2.6.26-rc8). Then it displays the max latency
+in microsecs (97 us). The number of trace entries displayed
+by the total number recorded (both are three: #3/3). The type of
+preemption that was used (PREEMPT). VP, KP, SP, and HP are always zero
+and reserved for later use. #P is the number of online CPUS (#P:2).
+
+The task is the process that was running when the latency happened.
+(swapper pid: 0).
+
+The start and stop that caused the latencies:
+
+  apic_timer_interrupt is where the interrupts were disabled.
+  do_softirq is where they were enabled again.
+
+The next lines after the header are the trace itself. The header
+explains which is which.
+
+  cmd: The name of the process in the trace.
+
+  pid: The PID of that process.
+
+  CPU#: The CPU that the process was running on.
+
+  irqs-off: 'd' interrupts are disabled. '.' otherwise.
+
+  need-resched: 'N' task need_resched is set, '.' otherwise.
+
+  hardirq/softirq:
+	'H' - hard irq happened inside a softirq.
+	'h' - hard irq is running
+	's' - soft irq is running
+	'.' - normal context.
+
+  preempt-depth: The level of preempt_disabled
+
+The above is mostly meaningful for kernel developers.
+
+  time: This differs from the trace output where as the trace output
+	contained a absolute timestamp. This timestamp is relative
+	to the start of the first entry in the the trace.
+
+  delay: This is just to help catch your eye a bit better. And
+	needs to be fixed to be only relative to the same CPU.
+	The marks is determined by the difference between this
+	current trace and the next trace.
+	 '!' - greater than preempt_mark_thresh (default 100)
+	 '+' - greater than 1 microsecond
+	 ' ' - less than or equal to 1 microsecond.
+
+  The rest is the same as the 'trace' file.
+
+
+iter_ctrl
+---------
+
+The iter_ctrl file is used to control what gets printed in the trace
+output. To see what is available, simply cat the file:
+
+  cat /debug/tracing/iter_ctrl
+  print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
+ noblock nostacktrace nosched-tree
+
+To disable one of the options, echo in the option appended with "no".
+
+  echo noprint-parent > /debug/tracing/iter_ctrl
+
+To enable an option, leave off the "no".
+
+  echo sym-offest > /debug/tracing/iter_ctrl
+
+Here are the available options:
+
+  print-parent - On function traces, display the calling function
+		as well as the function being traced.
+
+  print-parent:
+   bash-4000  [01]  1477.606694: simple_strtoul <-strict_strtoul
+
+  noprint-parent:
+   bash-4000  [01]  1477.606694: simple_strtoul
+
+
+  sym-offset - Display not only the function name, but also the offset
+		in the function. For example, instead of seeing just
+		"ktime_get" you will see "ktime_get+0xb/0x20"
+
+  sym-offset:
+   bash-4000  [01]  1477.606694: simple_strtoul+0x6/0xa0
+
+  sym-addr - this will also display the function address as well as
+		the function name.
+
+  sym-addr:
+   bash-4000  [01]  1477.606694: simple_strtoul <c0339346>
+
+  verbose - This deals with the latency_trace file.
+
+    bash  4000 1 0 00000000 00010a95 [58127d26] 1720.415ms \
+    (+0.000ms): simple_strtoul (strict_strtoul)
+
+  raw - This will display raw numbers. This option is best for use with
+	user applications that can translate the raw numbers better than
+	having it done in the kernel.
+
+  hex - similar to raw, but the numbers will be in a hexadecimal format.
+
+  bin - This will print out the formats in raw binary.
+
+  block - TBD (needs update)
+
+  stacktrace - This is one of the options that changes the trace itself.
+		When a trace is recorded, so is the stack of functions.
+		This allows for back traces of trace sites.
+
+  sched-tree - TBD (any users??)
+
+
+sched_switch
+------------
+
+This tracer simply records schedule switches. Here's an example
+on how to implement it.
+
+ # echo sched_switch > /debug/tracing/current_tracer
+ # echo 1 > /debug/tracing/tracing_enabled
+ # sleep 1
+ # echo 0 > /debug/tracing/tracing_enabled
+ # cat /debug/tracing/trace
+
+# tracer: sched_switch
+#
+#           TASK-PID   CPU#    TIMESTAMP  FUNCTION
+#              | |      |          |         |
+            bash-3997  [01]   240.132281:   3997:120:R   +  4055:120:R
+            bash-3997  [01]   240.132284:   3997:120:R ==>  4055:120:R
+           sleep-4055  [01]   240.132371:   4055:120:S ==>  3997:120:R
+            bash-3997  [01]   240.132454:   3997:120:R   +  4055:120:S
+            bash-3997  [01]   240.132457:   3997:120:R ==>  4055:120:R
+           sleep-4055  [01]   240.132460:   4055:120:D ==>  3997:120:R
+            bash-3997  [01]   240.132463:   3997:120:R   +  4055:120:D
+            bash-3997  [01]   240.132465:   3997:120:R ==>  4055:120:R
+          <idle>-0     [00]   240.132589:      0:140:R   +     4:115:S
+          <idle>-0     [00]   240.132591:      0:140:R ==>     4:115:R
+     ksoftirqd/0-4     [00]   240.132595:      4:115:S ==>     0:140:R
+          <idle>-0     [00]   240.132598:      0:140:R   +     4:115:S
+          <idle>-0     [00]   240.132599:      0:140:R ==>     4:115:R
+     ksoftirqd/0-4     [00]   240.132603:      4:115:S ==>     0:140:R
+           sleep-4055  [01]   240.133058:   4055:120:S ==>  3997:120:R
+ [...]
+
+
+As we have discussed previously about this format, the header shows
+the name of the trace and points to the options. The "FUNCTION"
+is a misnomer since here it represents the wake ups and context
+switches.
+
+The sched_switch only lists the wake ups (represented with '+')
+and context switches ('==>') with the previous task or current
+first followed by the next task or task waking up. The format for both
+of these is PID:KERNEL-PRIO:TASK-STATE. Remember that the KERNEL-PRIO
+is the inverse of the actual priority with zero (0) being the highest
+priority and the nice values starting at 100 (nice -20). Below is
+a quick chart to map the kernel priority to user land priorities.
+
+  Kernel priority: 0 to 99    ==> user RT priority 99 to 0
+  Kernel priority: 100 to 139 ==> user nice -20 to 19
+  Kernel priority: 140        ==> idle task priority
+
+The task states are:
+
+ R - running : wants to run, may not actually be running
+ S - sleep   : process is waiting to be woken up (handles signals)
+ D - deep sleep : process must be woken up (ignores signals)
+ T - stopped : process suspended
+ t - traced  : process is being traced (with something like gdb)
+ Z - zombie  : process waiting to be cleaned up
+ X - unknown
+
+
+ftrace_enabled
+--------------
+
+The following tracers give different output depending on whether
+or not the sysctl ftrace_enabled is set. To set ftrace_enabled,
+one can either use the sysctl function or set it via the proc
+file system interface.
+
+  sysctl kernel.ftrace_enabled=1
+
+ or
+
+  echo 1 > /proc/sys/kernel/ftrace_enabled
+
+To disable ftrace_enabled simply replace the '1' with '0' in
+the above commands.
+
+When ftrace_enabled is set the tracers will also record the functions
+that are within the trace. The descriptions of the tracers
+will also show an example with ftrace enabled.
+
+
+irqsoff
+-------
+
+When interrupts are disabled, the CPU can not react to any other
+external event (besides NMIs and SMIs). This prevents the timer
+interrupt from triggering or the mouse interrupt from letting the
+kernel know of a new mouse event. The result is a latency with the
+reaction time.
+
+The irqsoff tracer tracks the time interrupts are disabled and when
+they are re-enabled. When a new maximum latency is hit, it saves off
+the trace so that it may be retrieved at a later time. Every time a
+new maximum in reached, the old saved trace is discarded and the new
+trace is saved.
+
+To reset the maximum, echo 0 into tracing_max_latency. Here's an
+example:
+
+ # echo irqsoff > /debug/tracing/current_tracer
+ # echo 0 > /debug/tracing/tracing_max_latency
+ # echo 1 > /debug/tracing/tracing_enabled
+ # ls -ltr
+ [...]
+ # echo 0 > /debug/tracing/tracing_enabled
+ # cat /debug/tracing/latency_trace
+# tracer: irqsoff
+#
+irqsoff latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 6 us, #3/3, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+    -----------------
+    | task: bash-4269 (uid:0 nice:0 policy:0 rt_prio:0)
+    -----------------
+ => started at: copy_page_range
+ => ended at:   copy_page_range
+
+#                _------=> CPU#
+#               / _-----=> irqs-off
+#              | / _----=> need-resched
+#              || / _---=> hardirq/softirq
+#              ||| / _--=> preempt-depth
+#              |||| /
+#              |||||     delay
+#  cmd     pid ||||| time  |   caller
+#     \   /    |||||   \   |   /
+    bash-4269  1...1    0us+: _spin_lock (copy_page_range)
+    bash-4269  1...1    7us : _spin_unlock (copy_page_range)
+    bash-4269  1...2    7us : trace_preempt_on (copy_page_range)
+
+
+vim:ft=help
+
+Here we see that that we had a latency of 6 microsecs (which is
+very good). The spin_lock in copy_page_range disabled interrupts.
+The difference between the 6 and the displayed timestamp 7us is
+because the clock must have incremented between the time of recording
+the max latency and recording the function that had that latency.
+
+Note the above had ftrace_enabled not set. If we set the ftrace_enabled
+we get a much larger output:
+
+# tracer: irqsoff
+#
+irqsoff latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 50 us, #101/101, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+    -----------------
+    | task: ls-4339 (uid:0 nice:0 policy:0 rt_prio:0)
+    -----------------
+ => started at: __alloc_pages_internal
+ => ended at:   __alloc_pages_internal
+
+#                _------=> CPU#
+#               / _-----=> irqs-off
+#              | / _----=> need-resched
+#              || / _---=> hardirq/softirq
+#              ||| / _--=> preempt-depth
+#              |||| /
+#              |||||     delay
+#  cmd     pid ||||| time  |   caller
+#     \   /    |||||   \   |   /
+      ls-4339  0...1    0us+: get_page_from_freelist (__alloc_pages_internal)
+      ls-4339  0d..1    3us : rmqueue_bulk (get_page_from_freelist)
+      ls-4339  0d..1    3us : _spin_lock (rmqueue_bulk)
+      ls-4339  0d..1    4us : add_preempt_count (_spin_lock)
+      ls-4339  0d..2    4us : __rmqueue (rmqueue_bulk)
+      ls-4339  0d..2    5us : __rmqueue_smallest (__rmqueue)
+      ls-4339  0d..2    5us : __mod_zone_page_state (__rmqueue_smallest)
+      ls-4339  0d..2    6us : __rmqueue (rmqueue_bulk)
+      ls-4339  0d..2    6us : __rmqueue_smallest (__rmqueue)
+      ls-4339  0d..2    7us : __mod_zone_page_state (__rmqueue_smallest)
+      ls-4339  0d..2    7us : __rmqueue (rmqueue_bulk)
+      ls-4339  0d..2    8us : __rmqueue_smallest (__rmqueue)
+[...]
+      ls-4339  0d..2   46us : __rmqueue_smallest (__rmqueue)
+      ls-4339  0d..2   47us : __mod_zone_page_state (__rmqueue_smallest)
+      ls-4339  0d..2   47us : __rmqueue (rmqueue_bulk)
+      ls-4339  0d..2   48us : __rmqueue_smallest (__rmqueue)
+      ls-4339  0d..2   48us : __mod_zone_page_state (__rmqueue_smallest)
+      ls-4339  0d..2   49us : _spin_unlock (rmqueue_bulk)
+      ls-4339  0d..2   49us : sub_preempt_count (_spin_unlock)
+      ls-4339  0d..1   50us : get_page_from_freelist (__alloc_pages_internal)
+      ls-4339  0d..2   51us : trace_hardirqs_on (__alloc_pages_internal)
+
+
+vim:ft=help
+
+
+Here we traced a 50 microsecond latency. But we also see all the
+functions that were called during that time. Note that enabling
+function tracing we endure an added overhead. This overhead may
+extend the latency times. But never the less, this trace has provided
+some very helpful debugging.
+
+
+preemptoff
+----------
+
+When preemption is disabled we may be able to receive interrupts but
+the task can not be preempted and a higher priority task must wait
+for preemption to be enabled again before it can preempt a lower
+priority task.
+
+The preemptoff tracer traces the places that disables preemption.
+Like the irqsoff, it records the maximum latency that preemption
+was disabled. The control of preemptoff is much like the irqsoff.
+
+ # echo preemptoff > /debug/tracing/current_tracer
+ # echo 0 > /debug/tracing/tracing_max_latency
+ # echo 1 > /debug/tracing/tracing_enabled
+ # ls -ltr
+ [...]
+ # echo 0 > /debug/tracing/tracing_enabled
+ # cat /debug/tracing/latency_trace
+# tracer: preemptoff
+#
+preemptoff latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 29 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+    -----------------
+    | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0)
+    -----------------
+ => started at: do_IRQ
+ => ended at:   __do_softirq
+
+#                _------=> CPU#
+#               / _-----=> irqs-off
+#              | / _----=> need-resched
+#              || / _---=> hardirq/softirq
+#              ||| / _--=> preempt-depth
+#              |||| /
+#              |||||     delay
+#  cmd     pid ||||| time  |   caller
+#     \   /    |||||   \   |   /
+    sshd-4261  0d.h.    0us+: irq_enter (do_IRQ)
+    sshd-4261  0d.s.   29us : _local_bh_enable (__do_softirq)
+    sshd-4261  0d.s1   30us : trace_preempt_on (__do_softirq)
+
+
+vim:ft=help
+
+This has some more changes. Preemption was disabled when an interrupt
+came in (notice the 'h'), and was enabled while doing a softirq.
+(notice the 's'). But we also see that interrupts have been disabled
+when entering the preempt off section and leaving it (the 'd').
+We do not know if interrupts were enabled in the mean time.
+
+# tracer: preemptoff
+#
+preemptoff latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 63 us, #87/87, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+    -----------------
+    | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0)
+    -----------------
+ => started at: remove_wait_queue
+ => ended at:   __do_softirq
+
+#                _------=> CPU#
+#               / _-----=> irqs-off
+#              | / _----=> need-resched
+#              || / _---=> hardirq/softirq
+#              ||| / _--=> preempt-depth
+#              |||| /
+#              |||||     delay
+#  cmd     pid ||||| time  |   caller
+#     \   /    |||||   \   |   /
+    sshd-4261  0d..1    0us : _spin_lock_irqsave (remove_wait_queue)
+    sshd-4261  0d..1    1us : _spin_unlock_irqrestore (remove_wait_queue)
+    sshd-4261  0d..1    2us : do_IRQ (common_interrupt)
+    sshd-4261  0d..1    2us : irq_enter (do_IRQ)
+    sshd-4261  0d..1    2us : idle_cpu (irq_enter)
+    sshd-4261  0d..1    3us : add_preempt_count (irq_enter)
+    sshd-4261  0d.h1    3us : idle_cpu (irq_enter)
+    sshd-4261  0d.h.    4us : handle_fasteoi_irq (do_IRQ)
+[...]
+    sshd-4261  0d.h.   12us : add_preempt_count (_spin_lock)
+    sshd-4261  0d.h1   12us : ack_ioapic_quirk_irq (handle_fasteoi_irq)
+    sshd-4261  0d.h1   13us : move_native_irq (ack_ioapic_quirk_irq)
+    sshd-4261  0d.h1   13us : _spin_unlock (handle_fasteoi_irq)
+    sshd-4261  0d.h1   14us : sub_preempt_count (_spin_unlock)
+    sshd-4261  0d.h1   14us : irq_exit (do_IRQ)
+    sshd-4261  0d.h1   15us : sub_preempt_count (irq_exit)
+    sshd-4261  0d..2   15us : do_softirq (irq_exit)
+    sshd-4261  0d...   15us : __do_softirq (do_softirq)
+    sshd-4261  0d...   16us : __local_bh_disable (__do_softirq)
+    sshd-4261  0d...   16us+: add_preempt_count (__local_bh_disable)
+    sshd-4261  0d.s4   20us : add_preempt_count (__local_bh_disable)
+    sshd-4261  0d.s4   21us : sub_preempt_count (local_bh_enable)
+    sshd-4261  0d.s5   21us : sub_preempt_count (local_bh_enable)
+[...]
+    sshd-4261  0d.s6   41us : add_preempt_count (__local_bh_disable)
+    sshd-4261  0d.s6   42us : sub_preempt_count (local_bh_enable)
+    sshd-4261  0d.s7   42us : sub_preempt_count (local_bh_enable)
+    sshd-4261  0d.s5   43us : add_preempt_count (__local_bh_disable)
+    sshd-4261  0d.s5   43us : sub_preempt_count (local_bh_enable_ip)
+    sshd-4261  0d.s6   44us : sub_preempt_count (local_bh_enable_ip)
+    sshd-4261  0d.s5   44us : add_preempt_count (__local_bh_disable)
+    sshd-4261  0d.s5   45us : sub_preempt_count (local_bh_enable)
+[...]
+    sshd-4261  0d.s.   63us : _local_bh_enable (__do_softirq)
+    sshd-4261  0d.s1   64us : trace_preempt_on (__do_softirq)
+
+
+The above is an example of the preemptoff trace with ftrace_enabled
+set. Here we see that interrupts were disabled the entire time.
+The irq_enter code lets us know that we entered an interrupt 'h'.
+Before that, the functions being traced still show that it is not
+in an interrupt, but we can see by the functions themselves that
+this is not the case.
+
+Notice that the __do_softirq when called doesn't have a preempt_count.
+It may seem that we missed a preempt enabled. What really happened
+is that the preempt count is held on the threads stack and we
+switched to the softirq stack (4K stacks in effect). The code
+does not copy the preempt count, but because interrupts are disabled
+we don't need to worry about it. Having a tracer like this is good
+to let people know what really happens inside the kernel.
+
+
+preemptirqsoff
+--------------
+
+Knowing the locations that have interrupts disabled or preemption
+disabled for the longest times is helpful. But sometimes we would
+like to know when either preemption and/or interrupts are disabled.
+
+The following code:
+
+    local_irq_disable();
+    call_function_with_irqs_off();
+    preempt_disable();
+    call_function_with_irqs_and_preemption_off();
+    local_irq_enable();
+    call_function_with_preemption_off();
+    preempt_enable();
+
+The irqsoff tracer will record the total length of
+call_function_with_irqs_off() and
+call_function_with_irqs_and_preemption_off().
+
+The preemptoff tracer will record the total length of
+call_function_with_irqs_and_preemption_off() and
+call_function_with_preemption_off().
+
+But neither will trace the time that interrupts and/or preemption
+is disabled. This total time is the time that we can not schedule.
+To record this time, use the preemptirqsoff tracer.
+
+Again, using this trace is much like the irqsoff and preemptoff tracers.
+
+ # echo preemptoff > /debug/tracing/current_tracer
+ # echo 0 > /debug/tracing/tracing_max_latency
+ # echo 1 > /debug/tracing/tracing_enabled
+ # ls -ltr
+ [...]
+ # echo 0 > /debug/tracing/tracing_enabled
+ # cat /debug/tracing/latency_trace
+# tracer: preemptirqsoff
+#
+preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 293 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+    -----------------
+    | task: ls-4860 (uid:0 nice:0 policy:0 rt_prio:0)
+    -----------------
+ => started at: apic_timer_interrupt
+ => ended at:   __do_softirq
+
+#                _------=> CPU#
+#               / _-----=> irqs-off
+#              | / _----=> need-resched
+#              || / _---=> hardirq/softirq
+#              ||| / _--=> preempt-depth
+#              |||| /
+#              |||||     delay
+#  cmd     pid ||||| time  |   caller
+#     \   /    |||||   \   |   /
+      ls-4860  0d...    0us!: trace_hardirqs_off_thunk (apic_timer_interrupt)
+      ls-4860  0d.s.  294us : _local_bh_enable (__do_softirq)
+      ls-4860  0d.s1  294us : trace_preempt_on (__do_softirq)
+
+
+vim:ft=help
+
+
+The trace_hardirqs_off_thunk is called from assembly on x86 when
+interrupts are disabled in the assembly code. Without the function
+tracing, we don't know if interrupts were enabled within the preemption
+points. We do see that it started with preemption enabled.
+
+Here is a trace with ftrace_enabled set:
+
+
+# tracer: preemptirqsoff
+#
+preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 105 us, #183/183, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+    -----------------
+    | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0)
+    -----------------
+ => started at: write_chan
+ => ended at:   __do_softirq
+
+#                _------=> CPU#
+#               / _-----=> irqs-off
+#              | / _----=> need-resched
+#              || / _---=> hardirq/softirq
+#              ||| / _--=> preempt-depth
+#              |||| /
+#              |||||     delay
+#  cmd     pid ||||| time  |   caller
+#     \   /    |||||   \   |   /
+      ls-4473  0.N..    0us : preempt_schedule (write_chan)
+      ls-4473  0dN.1    1us : _spin_lock (schedule)
+      ls-4473  0dN.1    2us : add_preempt_count (_spin_lock)
+      ls-4473  0d..2    2us : put_prev_task_fair (schedule)
+[...]
+      ls-4473  0d..2   13us : set_normalized_timespec (ktime_get_ts)
+      ls-4473  0d..2   13us : __switch_to (schedule)
+    sshd-4261  0d..2   14us : finish_task_switch (schedule)
+    sshd-4261  0d..2   14us : _spin_unlock_irq (finish_task_switch)
+    sshd-4261  0d..1   15us : add_preempt_count (_spin_lock_irqsave)
+    sshd-4261  0d..2   16us : _spin_unlock_irqrestore (hrtick_set)
+    sshd-4261  0d..2   16us : do_IRQ (common_interrupt)
+    sshd-4261  0d..2   17us : irq_enter (do_IRQ)
+    sshd-4261  0d..2   17us : idle_cpu (irq_enter)
+    sshd-4261  0d..2   18us : add_preempt_count (irq_enter)
+    sshd-4261  0d.h2   18us : idle_cpu (irq_enter)
+    sshd-4261  0d.h.   18us : handle_fasteoi_irq (do_IRQ)
+    sshd-4261  0d.h.   19us : _spin_lock (handle_fasteoi_irq)
+    sshd-4261  0d.h.   19us : add_preempt_count (_spin_lock)
+    sshd-4261  0d.h1   20us : _spin_unlock (handle_fasteoi_irq)
+    sshd-4261  0d.h1   20us : sub_preempt_count (_spin_unlock)
+[...]
+    sshd-4261  0d.h1   28us : _spin_unlock (handle_fasteoi_irq)
+    sshd-4261  0d.h1   29us : sub_preempt_count (_spin_unlock)
+    sshd-4261  0d.h2   29us : irq_exit (do_IRQ)
+    sshd-4261  0d.h2   29us : sub_preempt_count (irq_exit)
+    sshd-4261  0d..3   30us : do_softirq (irq_exit)
+    sshd-4261  0d...   30us : __do_softirq (do_softirq)
+    sshd-4261  0d...   31us : __local_bh_disable (__do_softirq)
+    sshd-4261  0d...   31us+: add_preempt_count (__local_bh_disable)
+    sshd-4261  0d.s4   34us : add_preempt_count (__local_bh_disable)
+[...]
+    sshd-4261  0d.s3   43us : sub_preempt_count (local_bh_enable_ip)
+    sshd-4261  0d.s4   44us : sub_preempt_count (local_bh_enable_ip)
+    sshd-4261  0d.s3   44us : smp_apic_timer_interrupt (apic_timer_interrupt)
+    sshd-4261  0d.s3   45us : irq_enter (smp_apic_timer_interrupt)
+    sshd-4261  0d.s3   45us : idle_cpu (irq_enter)
+    sshd-4261  0d.s3   46us : add_preempt_count (irq_enter)
+    sshd-4261  0d.H3   46us : idle_cpu (irq_enter)
+    sshd-4261  0d.H3   47us : hrtimer_interrupt (smp_apic_timer_interrupt)
+    sshd-4261  0d.H3   47us : ktime_get (hrtimer_interrupt)
+[...]
+    sshd-4261  0d.H3   81us : tick_program_event (hrtimer_interrupt)
+    sshd-4261  0d.H3   82us : ktime_get (tick_program_event)
+    sshd-4261  0d.H3   82us : ktime_get_ts (ktime_get)
+    sshd-4261  0d.H3   83us : getnstimeofday (ktime_get_ts)
+    sshd-4261  0d.H3   83us : set_normalized_timespec (ktime_get_ts)
+    sshd-4261  0d.H3   84us : clockevents_program_event (tick_program_event)
+    sshd-4261  0d.H3   84us : lapic_next_event (clockevents_program_event)
+    sshd-4261  0d.H3   85us : irq_exit (smp_apic_timer_interrupt)
+    sshd-4261  0d.H3   85us : sub_preempt_count (irq_exit)
+    sshd-4261  0d.s4   86us : sub_preempt_count (irq_exit)
+    sshd-4261  0d.s3   86us : add_preempt_count (__local_bh_disable)
+[...]
+    sshd-4261  0d.s1   98us : sub_preempt_count (net_rx_action)
+    sshd-4261  0d.s.   99us : add_preempt_count (_spin_lock_irq)
+    sshd-4261  0d.s1   99us+: _spin_unlock_irq (run_timer_softirq)
+    sshd-4261  0d.s.  104us : _local_bh_enable (__do_softirq)
+    sshd-4261  0d.s.  104us : sub_preempt_count (_local_bh_enable)
+    sshd-4261  0d.s.  105us : _local_bh_enable (__do_softirq)
+    sshd-4261  0d.s1  105us : trace_preempt_on (__do_softirq)
+
+
+This is a very interesting trace. It started with the preemption of
+the ls task. We see that the task had the "need_resched" bit set
+with the 'N' in the trace.  Interrupts are disabled in the spin_lock
+and the trace started. We see that a schedule took place to run
+sshd.  When the interrupts were enabled we took an interrupt.
+On return of the interrupt the softirq ran. We took another interrupt
+while running the softirq as we see with the capital 'H'.
+
+
+wakeup
+------
+
+In Real-Time environment it is very important to know the wakeup
+time it takes for the highest priority task that wakes up to the
+time it executes. This is also known as "schedule latency".
+I stress the point that this is about RT tasks. It is also important
+to know the scheduling latency of non-RT tasks, but the average
+schedule latency is better for non-RT tasks. Tools like
+LatencyTop is more appropriate for such measurements.
+
+Real-Time environments is interested in the worst case latency.
+That is the longest latency it takes for something to happen, and
+not the average. We can have a very fast scheduler that may only
+have a large latency once in a while, but that would not work well
+with Real-Time tasks.  The wakeup tracer was designed to record
+the worst case wakeups of RT tasks. Non-RT tasks are not recorded
+because the tracer only records one worst case and tracing non-RT
+tasks that are unpredictable will overwrite the worst case latency
+of RT tasks.
+
+Since this tracer only deals with RT tasks, we will run this slightly
+different than we did with the previous tracers. Instead of performing
+an 'ls' we will run 'sleep 1' under 'chrt' which changes the
+priority of the task.
+
+ # echo wakeup > /debug/tracing/current_tracer
+ # echo 0 > /debug/tracing/tracing_max_latency
+ # echo 1 > /debug/tracing/tracing_enabled
+ # chrt -f 5 sleep 1
+ # echo 0 > /debug/tracing/tracing_enabled
+ # cat /debug/tracing/latency_trace
+# tracer: wakeup
+#
+wakeup latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 4 us, #2/2, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+    -----------------
+    | task: sleep-4901 (uid:0 nice:0 policy:1 rt_prio:5)
+    -----------------
+
+#                _------=> CPU#
+#               / _-----=> irqs-off
+#              | / _----=> need-resched
+#              || / _---=> hardirq/softirq
+#              ||| / _--=> preempt-depth
+#              |||| /
+#              |||||     delay
+#  cmd     pid ||||| time  |   caller
+#     \   /    |||||   \   |   /
+  <idle>-0     1d.h4    0us+: try_to_wake_up (wake_up_process)
+  <idle>-0     1d..4    4us : schedule (cpu_idle)
+
+
+vim:ft=help
+
+
+Running this on an idle system we see that it only took 4 microseconds
+to perform the task switch.  Note, since the trace marker in the
+schedule is before the actual "switch" we stop the tracing when
+the recorded task is about to schedule in. This may change if
+we add a new marker at the end of the scheduler.
+
+Notice that the recorded task is 'sleep' with the PID of 4901 and it
+has an rt_prio of 5. This priority is user-space priority and not
+the internal kernel priority. The policy is 1 for SCHED_FIFO and 2
+for SCHED_RR.
+
+Doing the same with chrt -r 5 and ftrace_enabled set.
+
+# tracer: wakeup
+#
+wakeup latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 50 us, #60/60, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+    -----------------
+    | task: sleep-4068 (uid:0 nice:0 policy:2 rt_prio:5)
+    -----------------
+
+#                _------=> CPU#
+#               / _-----=> irqs-off
+#              | / _----=> need-resched
+#              || / _---=> hardirq/softirq
+#              ||| / _--=> preempt-depth
+#              |||| /
+#              |||||     delay
+#  cmd     pid ||||| time  |   caller
+#     \   /    |||||   \   |   /
+ksoftirq-7     1d.H3    0us : try_to_wake_up (wake_up_process)
+ksoftirq-7     1d.H4    1us : sub_preempt_count (marker_probe_cb)
+ksoftirq-7     1d.H3    2us : check_preempt_wakeup (try_to_wake_up)
+ksoftirq-7     1d.H3    3us : update_curr (check_preempt_wakeup)
+ksoftirq-7     1d.H3    4us : calc_delta_mine (update_curr)
+ksoftirq-7     1d.H3    5us : __resched_task (check_preempt_wakeup)
+ksoftirq-7     1d.H3    6us : task_wake_up_rt (try_to_wake_up)
+ksoftirq-7     1d.H3    7us : _spin_unlock_irqrestore (try_to_wake_up)
+[...]
+ksoftirq-7     1d.H2   17us : irq_exit (smp_apic_timer_interrupt)
+ksoftirq-7     1d.H2   18us : sub_preempt_count (irq_exit)
+ksoftirq-7     1d.s3   19us : sub_preempt_count (irq_exit)
+ksoftirq-7     1..s2   20us : rcu_process_callbacks (__do_softirq)
+[...]
+ksoftirq-7     1..s2   26us : __rcu_process_callbacks (rcu_process_callbacks)
+ksoftirq-7     1d.s2   27us : _local_bh_enable (__do_softirq)
+ksoftirq-7     1d.s2   28us : sub_preempt_count (_local_bh_enable)
+ksoftirq-7     1.N.3   29us : sub_preempt_count (ksoftirqd)
+ksoftirq-7     1.N.2   30us : _cond_resched (ksoftirqd)
+ksoftirq-7     1.N.2   31us : __cond_resched (_cond_resched)
+ksoftirq-7     1.N.2   32us : add_preempt_count (__cond_resched)
+ksoftirq-7     1.N.2   33us : schedule (__cond_resched)
+ksoftirq-7     1.N.2   33us : add_preempt_count (schedule)
+ksoftirq-7     1.N.3   34us : hrtick_clear (schedule)
+ksoftirq-7     1dN.3   35us : _spin_lock (schedule)
+ksoftirq-7     1dN.3   36us : add_preempt_count (_spin_lock)
+ksoftirq-7     1d..4   37us : put_prev_task_fair (schedule)
+ksoftirq-7     1d..4   38us : update_curr (put_prev_task_fair)
+[...]
+ksoftirq-7     1d..5   47us : _spin_trylock (tracing_record_cmdline)
+ksoftirq-7     1d..5   48us : add_preempt_count (_spin_trylock)
+ksoftirq-7     1d..6   49us : _spin_unlock (tracing_record_cmdline)
+ksoftirq-7     1d..6   49us : sub_preempt_count (_spin_unlock)
+ksoftirq-7     1d..4   50us : schedule (__cond_resched)
+
+The interrupt went off while running ksoftirqd. This task runs at
+SCHED_OTHER. Why didn't we see the 'N' set early? This may be
+a harmless bug with x86_32 and 4K stacks. The need_reched() function
+that tests if we need to reschedule looks on the actual stack.
+Where as the setting of the NEED_RESCHED bit happens on the
+task's stack. But because we are in a hard interrupt, the test
+is with the interrupts stack which has that to be false. We don't
+see the 'N' until we switch back to the task's stack.
+
+ftrace
+------
+
+ftrace is not only the name of the tracing infrastructure, but it
+is also a name of one of the tracers. The tracer is the function
+tracer. Enabling the function tracer can be done from the
+debug file system. Make sure the ftrace_enabled is set otherwise
+this tracer is a nop.
+
+ # sysctl kernel.ftrace_enabled=1
+ # echo ftrace > /debug/tracing/current_tracer
+ # echo 1 > /debug/tracing/tracing_enabled
+ # usleep 1
+ # echo 0 > /debug/tracing/tracing_enabled
+ # cat /debug/tracing/trace
+# tracer: ftrace
+#
+#           TASK-PID   CPU#    TIMESTAMP  FUNCTION
+#              | |      |          |         |
+            bash-4003  [00]   123.638713: finish_task_switch <-schedule
+            bash-4003  [00]   123.638714: _spin_unlock_irq <-finish_task_switch
+            bash-4003  [00]   123.638714: sub_preempt_count <-_spin_unlock_irq
+            bash-4003  [00]   123.638715: hrtick_set <-schedule
+            bash-4003  [00]   123.638715: _spin_lock_irqsave <-hrtick_set
+            bash-4003  [00]   123.638716: add_preempt_count <-_spin_lock_irqsave
+            bash-4003  [00]   123.638716: _spin_unlock_irqrestore <-hrtick_set
+            bash-4003  [00]   123.638717: sub_preempt_count <-_spin_unlock_irqrestore
+            bash-4003  [00]   123.638717: hrtick_clear <-hrtick_set
+            bash-4003  [00]   123.638718: sub_preempt_count <-schedule
+            bash-4003  [00]   123.638718: sub_preempt_count <-preempt_schedule
+            bash-4003  [00]   123.638719: wait_for_completion <-__stop_machine_run
+            bash-4003  [00]   123.638719: wait_for_common <-wait_for_completion
+            bash-4003  [00]   123.638720: _spin_lock_irq <-wait_for_common
+            bash-4003  [00]   123.638720: add_preempt_count <-_spin_lock_irq
+[...]
+
+
+Note: It is sometimes better to enable or disable tracing directly from
+a program, because the buffer may be overflowed by the echo commands
+before you get to the point you want to trace. It is also easier to
+stop the tracing at the point that you hit the part that you are
+interested in. Since the ftrace buffer is a ring buffer with the
+oldest data being overwritten, usually it is sufficient to start the
+tracer with an echo command but have you code stop it. Something
+like the following is usually appropriate for this.
+
+int trace_fd;
+[...]
+int main(int argc, char *argv[]) {
+	[...]
+	trace_fd = open("/debug/tracing/tracing_enabled", O_WRONLY);
+	[...]
+	if (condition_hit()) {
+	write(trace_fd, "0", 1);
+	}
+	[...]
+}
+
+
+dynamic ftrace
+--------------
+
+If CONFIG_DYNAMIC_FTRACE is set, then the system will run with
+virtually no overhead when function tracing is disabled. The way
+this works is the mcount function call (placed at the start of
+every kernel function, produced by the -pg switch in gcc), starts
+of pointing to a simple return.
+
+When dynamic ftrace is initialized, it calls kstop_machine to make it
+act like a uniprocessor so that it can freely modify code without
+worrying about other processors executing that same code.  At
+initialization, the mcount calls are change to call a "record_ip"
+function.  After this, the first time a kernel function is called,
+it has the calling address saved in a hash table.
+
+Later on the ftraced kernel thread is awoken and will again call
+kstop_machine if new functions have been recorded. The ftraced thread
+will change all calls to mcount to "nop".  Just calling mcount
+and having mcount return has shown a 10% overhead. By converting
+it to a nop, there is no recordable overhead to the system.
+
+One special side-effect to the recording of the functions being
+traced, is that we can now selectively choose which functions we
+want to trace and which ones we want the mcount calls to remain as
+nops.
+
+Two files that contain to the enabling and disabling of recorded
+functions are:
+
+  set_ftrace_filter
+
+and
+
+  set_ftrace_notrace
+
+A list of available functions that you can add to this files is listed
+in:
+
+   available_filter_functions
+
+ # cat /debug/tracing/available_filter_functions
+put_prev_task_idle
+kmem_cache_create
+pick_next_task_rt
+get_online_cpus
+pick_next_task_fair
+mutex_lock
+[...]
+
+If I'm only interested in sys_nanosleep and hrtimer_interrupt:
+
+ # echo sys_nanosleep hrtimer_interrupt \
+		> /debug/tracing/set_ftrace_filter
+ # echo ftrace > /debug/tracing/current_tracer
+ # echo 1 > /debug/tracing/tracing_enabled
+ # usleep 1
+ # echo 0 > /debug/tracing/tracing_enabled
+ # cat /debug/tracing/trace
+# tracer: ftrace
+#
+#           TASK-PID   CPU#    TIMESTAMP  FUNCTION
+#              | |      |          |         |
+          usleep-4134  [00]  1317.070017: hrtimer_interrupt <-smp_apic_timer_interrupt
+          usleep-4134  [00]  1317.070111: sys_nanosleep <-syscall_call
+          <idle>-0     [00]  1317.070115: hrtimer_interrupt <-smp_apic_timer_interrupt
+
+To see what functions are being traced, you can cat the file:
+
+ # cat /debug/tracing/set_ftrace_filter
+hrtimer_interrupt
+sys_nanosleep
+
+
+Perhaps this isn't enough. The filters also allow simple wild cards.
+Only the following is currently available
+
+  <match>*  - will match functions that begins with <match>
+  *<match>  - will match functions that end with <match>
+  *<match>* - will match functions that have <match> in it
+
+Thats all the wild cards that are allowed.
+
+  <match>*<match> will not work.
+
+ # echo hrtimer_* > /debug/tracing/set_ftrace_filter
+
+Produces:
+
+# tracer: ftrace
+#
+#           TASK-PID   CPU#    TIMESTAMP  FUNCTION
+#              | |      |          |         |
+            bash-4003  [00]  1480.611794: hrtimer_init <-copy_process
+            bash-4003  [00]  1480.611941: hrtimer_start <-hrtick_set
+            bash-4003  [00]  1480.611956: hrtimer_cancel <-hrtick_clear
+            bash-4003  [00]  1480.611956: hrtimer_try_to_cancel <-hrtimer_cancel
+          <idle>-0     [00]  1480.612019: hrtimer_get_next_event <-get_next_timer_interrupt
+          <idle>-0     [00]  1480.612025: hrtimer_get_next_event <-get_next_timer_interrupt
+          <idle>-0     [00]  1480.612032: hrtimer_get_next_event <-get_next_timer_interrupt
+          <idle>-0     [00]  1480.612037: hrtimer_get_next_event <-get_next_timer_interrupt
+          <idle>-0     [00]  1480.612382: hrtimer_get_next_event <-get_next_timer_interrupt
+
+
+Notice that we lost the sys_nanosleep.
+
+ # cat /debug/tracing/set_ftrace_filter
+hrtimer_run_queues
+hrtimer_run_pending
+hrtimer_init
+hrtimer_cancel
+hrtimer_try_to_cancel
+hrtimer_forward
+hrtimer_start
+hrtimer_reprogram
+hrtimer_force_reprogram
+hrtimer_get_next_event
+hrtimer_interrupt
+hrtimer_nanosleep
+hrtimer_wakeup
+hrtimer_get_remaining
+hrtimer_get_res
+hrtimer_init_sleeper
+
+
+This is because the '>' and '>>' act just like they do in bash.
+To rewrite the filters, use '>'
+To append to the filters, use '>>'
+
+To clear out a filter so that all functions will be recorded again.
+
+ # echo > /debug/tracing/set_ftrace_filter
+ # cat /debug/tracing/set_ftrace_filter
+ #
+
+Again, now we want to append.
+
+ # echo sys_nanosleep > /debug/tracing/set_ftrace_filter
+ # cat /debug/tracing/set_ftrace_filter
+sys_nanosleep
+ # echo hrtimer_* >> /debug/tracing/set_ftrace_filter
+ # cat /debug/tracing/set_ftrace_filter
+hrtimer_run_queues
+hrtimer_run_pending
+hrtimer_init
+hrtimer_cancel
+hrtimer_try_to_cancel
+hrtimer_forward
+hrtimer_start
+hrtimer_reprogram
+hrtimer_force_reprogram
+hrtimer_get_next_event
+hrtimer_interrupt
+sys_nanosleep
+hrtimer_nanosleep
+hrtimer_wakeup
+hrtimer_get_remaining
+hrtimer_get_res
+hrtimer_init_sleeper
+
+
+The set_ftrace_notrace prevents those functions from being traced.
+
+ # echo '*preempt*' '*lock*' > /debug/tracing/set_ftrace_notrace
+
+Produces:
+
+# tracer: ftrace
+#
+#           TASK-PID   CPU#    TIMESTAMP  FUNCTION
+#              | |      |          |         |
+            bash-4043  [01]   115.281644: finish_task_switch <-schedule
+            bash-4043  [01]   115.281645: hrtick_set <-schedule
+            bash-4043  [01]   115.281645: hrtick_clear <-hrtick_set
+            bash-4043  [01]   115.281646: wait_for_completion <-__stop_machine_run
+            bash-4043  [01]   115.281647: wait_for_common <-wait_for_completion
+            bash-4043  [01]   115.281647: kthread_stop <-stop_machine_run
+            bash-4043  [01]   115.281648: init_waitqueue_head <-kthread_stop
+            bash-4043  [01]   115.281648: wake_up_process <-kthread_stop
+            bash-4043  [01]   115.281649: try_to_wake_up <-wake_up_process
+
+We can see that there's no more lock or preempt tracing.
+
+ftraced
+-------
+
+As mentioned above, when dynamic ftrace is configured in, a kernel
+thread wakes up once a second and checks to see if there are mcount
+calls that need to be converted into nops. If there is not, then
+it simply goes back to sleep. But if there is, it will call
+kstop_machine to convert the calls to nops.
+
+There may be a case that you do not want this added latency.
+Perhaps you are doing some audio recording and this activity might
+cause skips in the playback. There is an interface to disable
+and enable the ftraced kernel thread.
+
+ # echo 0 > /debug/tracing/ftraced_enabled
+
+This will disable the calling of the kstop_machine to update the
+mcount calls to nops. Remember that there's a large overhead
+to calling mcount. Without this kernel thread, that overhead will
+exist.
+
+Any write to the ftraced_enabled file will cause the kstop_machine
+to run if there are recorded calls to mcount. This means that a
+user can manually perform the updates when they want to by simply
+echoing a '0' into the ftraced_enabled file.
+
+The updates are also done at the beginning of enabling a tracer
+that uses ftrace function recording.
+
+
+trace_pipe
+----------
+
+The trace_pipe outputs the same as trace, but the effect on the
+tracing is different. Every read from trace_pipe is consumed.
+This means that subsequent reads will be different. The trace
+is live.
+
+ # echo ftrace > /debug/tracing/current_tracer
+ # cat /debug/tracing/trace_pipe > /tmp/trace.out &
+[1] 4153
+ # echo 1 > /debug/tracing/tracing_enabled
+ # usleep 1
+ # echo 0 > /debug/tracing/tracing_enabled
+ # cat /debug/tracing/trace
+# tracer: ftrace
+#
+#           TASK-PID   CPU#    TIMESTAMP  FUNCTION
+#              | |      |          |         |
+
+ #
+ # cat /tmp/trace.out
+            bash-4043  [00] 41.267106: finish_task_switch <-schedule
+            bash-4043  [00] 41.267106: hrtick_set <-schedule
+            bash-4043  [00] 41.267107: hrtick_clear <-hrtick_set
+            bash-4043  [00] 41.267108: wait_for_completion <-__stop_machine_run
+            bash-4043  [00] 41.267108: wait_for_common <-wait_for_completion
+            bash-4043  [00] 41.267109: kthread_stop <-stop_machine_run
+            bash-4043  [00] 41.267109: init_waitqueue_head <-kthread_stop
+            bash-4043  [00] 41.267110: wake_up_process <-kthread_stop
+            bash-4043  [00] 41.267110: try_to_wake_up <-wake_up_process
+            bash-4043  [00] 41.267111: select_task_rq_rt <-try_to_wake_up
+
+
+Note, reading the trace_pipe will block until more input is added.
+By changing the tracer, trace_pipe will issue an EOF. We needed
+to set the ftrace tracer _before_ cating the trace_pipe file.
+
+
+trace entries
+-------------
+
+Having too much or not enough data can be troublesome in diagnosing
+some issue in the kernel. The file trace_entries is used to modify
+the size of the internal trace buffers. The numbers listed
+is the number of entries that can be recorded per CPU. To know
+the full size, multiply the number of possible CPUS with the
+number of entries.
+
+ # cat /debug/tracing/trace_entries
+65620
+
+Note, to modify this you must have tracing fulling disabled. To do that,
+echo "none" into the current_tracer.
+
+ # echo none > /debug/tracing/current_tracer
+ # echo 100000 > /debug/tracing/trace_entries
+ # cat /debug/tracing/trace_entries
+100045
+
+
+Notice that we echoed in 100,000 but the size is 100,045. The entries
+are held by individual pages. It allocates the number of pages it takes
+to fulfill the request. If more entries may fit on the last page
+it will add them.
+
+ # echo 1 > /debug/tracing/trace_entries
+ # cat /debug/tracing/trace_entries
+85
+
+This shows us that 85 entries can fit on a single page.
+
+The number of pages that will be allocated is a percentage of available
+memory. Allocating too much will produces an error.
+
+ # echo 1000000000000 > /debug/tracing/trace_entries
+-bash: echo: write error: Cannot allocate memory
+ # cat /debug/tracing/trace_entries
+85
+
diff --git a/Makefile b/Makefile
index 6aff5f47c21d..6315424a00b9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 26
-EXTRAVERSION = -rc8
+EXTRAVERSION = -rc9
 NAME = Rotary Wombat
 
 # *DOCUMENTATION*
diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c
index 76935e320214..27a5b466c85c 100644
--- a/arch/mips/mm/c-r3k.c
+++ b/arch/mips/mm/c-r3k.c
@@ -26,7 +26,7 @@
 static unsigned long icache_size, dcache_size;		/* Size in bytes */
 static unsigned long icache_lsize, dcache_lsize;	/* Size in bytes */
 
-unsigned long __init r3k_cache_size(unsigned long ca_flags)
+unsigned long __cpuinit r3k_cache_size(unsigned long ca_flags)
 {
 	unsigned long flags, status, dummy, size;
 	volatile unsigned long *p;
@@ -61,7 +61,7 @@ unsigned long __init r3k_cache_size(unsigned long ca_flags)
 	return size * sizeof(*p);
 }
 
-unsigned long __init r3k_cache_lsize(unsigned long ca_flags)
+unsigned long __cpuinit r3k_cache_lsize(unsigned long ca_flags)
 {
 	unsigned long flags, status, lsize, i;
 	volatile unsigned long *p;
@@ -90,7 +90,7 @@ unsigned long __init r3k_cache_lsize(unsigned long ca_flags)
 	return lsize * sizeof(*p);
 }
 
-static void __init r3k_probe_cache(void)
+static void __cpuinit r3k_probe_cache(void)
 {
 	dcache_size = r3k_cache_size(ST0_ISC);
 	if (dcache_size)
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index 1edf0cbbeede..1417c6494858 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -235,13 +235,12 @@ static void __cpuinit set_prefetch_parameters(void)
 	}
 	/*
 	 * Too much unrolling will overflow the available space in
-	 * clear_space_array / copy_page_array. 8 words sounds generous,
-	 * but a R4000 with 128 byte L2 line length can exceed even that.
+	 * clear_space_array / copy_page_array.
 	 */
-	half_clear_loop_size = min(8 * clear_word_size,
+	half_clear_loop_size = min(16 * clear_word_size,
 				   max(cache_line_size >> 1,
 				       4 * clear_word_size));
-	half_copy_loop_size = min(8 * copy_word_size,
+	half_copy_loop_size = min(16 * copy_word_size,
 				  max(cache_line_size >> 1,
 				      4 * copy_word_size));
 }
@@ -263,21 +262,23 @@ static inline void __cpuinit build_clear_pref(u32 **buf, int off)
 	if (pref_bias_clear_store) {
 		uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off,
 			    A0);
-	} else if (cpu_has_cache_cdex_s) {
-		uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
-	} else if (cpu_has_cache_cdex_p) {
-		if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
-		}
+	} else if (cache_line_size == (half_clear_loop_size << 1)) {
+		if (cpu_has_cache_cdex_s) {
+			uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
+		} else if (cpu_has_cache_cdex_p) {
+			if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+			}
 
-		if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
-			uasm_i_lw(buf, ZERO, ZERO, AT);
+			if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+				uasm_i_lw(buf, ZERO, ZERO, AT);
 
-		uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
-	}
+			uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+		}
+		}
 }
 
 void __cpuinit build_clear_page(void)
@@ -403,20 +404,22 @@ static inline void build_copy_store_pref(u32 **buf, int off)
 	if (pref_bias_copy_store) {
 		uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off,
 			    A0);
-	} else if (cpu_has_cache_cdex_s) {
-		uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
-	} else if (cpu_has_cache_cdex_p) {
-		if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
-		}
+	} else if (cache_line_size == (half_copy_loop_size << 1)) {
+		if (cpu_has_cache_cdex_s) {
+			uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
+		} else if (cpu_has_cache_cdex_p) {
+			if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+			}
 
-		if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
-			uasm_i_lw(buf, ZERO, ZERO, AT);
+			if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+				uasm_i_lw(buf, ZERO, ZERO, AT);
 
-		uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+			uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+		}
 	}
 }
 
diff --git a/arch/mips/mm/sc-rm7k.c b/arch/mips/mm/sc-rm7k.c
index fc227f3b1199..e3abfb2d7e86 100644
--- a/arch/mips/mm/sc-rm7k.c
+++ b/arch/mips/mm/sc-rm7k.c
@@ -86,7 +86,7 @@ static void rm7k_sc_inv(unsigned long addr, unsigned long size)
 /*
  * This function is executed in uncached address space.
  */
-static __init void __rm7k_sc_enable(void)
+static __cpuinit void __rm7k_sc_enable(void)
 {
 	int i;
 
@@ -107,7 +107,7 @@ static __init void __rm7k_sc_enable(void)
 	}
 }
 
-static __init void rm7k_sc_enable(void)
+static __cpuinit void rm7k_sc_enable(void)
 {
 	if (read_c0_config() & RM7K_CONF_SE)
 		return;
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index cf37f5ca4b71..4d96e1db55ee 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -33,13 +33,14 @@ static struct legacy_serial_info {
 	phys_addr_t			taddr;
 } legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS];
 
-static struct __initdata of_device_id parents[] = {
+static struct __initdata of_device_id legacy_serial_parents[] = {
 	{.type = "soc",},
 	{.type = "tsi-bridge",},
 	{.type = "opb", },
 	{.compatible = "ibm,opb",},
 	{.compatible = "simple-bus",},
 	{.compatible = "wrs,epld-localbus",},
+	{},
 };
 
 static unsigned int legacy_serial_count;
@@ -327,7 +328,7 @@ void __init find_legacy_serial_ports(void)
 		struct device_node *parent = of_get_parent(np);
 		if (!parent)
 			continue;
-		if (of_match_node(parents, parent) != NULL) {
+		if (of_match_node(legacy_serial_parents, parent) != NULL) {
 			index = add_legacy_soc_port(np, np);
 			if (index >= 0 && np == stdout)
 				legacy_serial_console = index;
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index e79ad8afda07..3f37a6e62771 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -76,6 +76,8 @@ struct of_device* of_platform_device_create(struct device_node *np,
 		return NULL;
 
 	dev->dma_mask = 0xffffffffUL;
+	dev->dev.coherent_dma_mask = DMA_32BIT_MASK;
+
 	dev->dev.bus = &of_platform_bus_type;
 
 	/* We do not fill the DMA ops for platform devices by default.
diff --git a/arch/um/Makefile b/arch/um/Makefile
index dbeab15e7bb7..ca40397017b9 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -77,7 +77,6 @@ include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS)
 KERNEL_DEFINES = $(strip -Derrno=kernel_errno -Dsigprocmask=kernel_sigprocmask \
 			 -Dmktime=kernel_mktime $(ARCH_KERNEL_DEFINES))
 KBUILD_CFLAGS += $(KERNEL_DEFINES)
-KBUILD_CFLAGS += $(call cc-option,-fno-unit-at-a-time,)
 
 PHONY += linux
 
diff --git a/arch/um/Makefile-i386 b/arch/um/Makefile-i386
index 561e373bd850..302cbe504543 100644
--- a/arch/um/Makefile-i386
+++ b/arch/um/Makefile-i386
@@ -32,4 +32,11 @@ cflags-y += $(call cc-option,-mpreferred-stack-boundary=2)
 # an unresolved reference.
 cflags-y += -ffreestanding
 
+# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
+# a lot more stack due to the lack of sharing of stacklots.  Also, gcc
+# 4.3.0 needs -funit-at-a-time for extern inline functions.
+KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \
+			echo $(call cc-option,-fno-unit-at-a-time); \
+			else echo $(call cc-option,-funit-at-a-time); fi ;)
+
 KBUILD_CFLAGS += $(cflags-y)
diff --git a/arch/um/Makefile-x86_64 b/arch/um/Makefile-x86_64
index 8ed362f93582..a9cd7e77a7ab 100644
--- a/arch/um/Makefile-x86_64
+++ b/arch/um/Makefile-x86_64
@@ -21,3 +21,6 @@ HEADER_ARCH := x86
 
 LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64
 LINK-y += -m64
+
+# Do unit-at-a-time unconditionally on x86_64, following the host
+KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
diff --git a/arch/x86/kernel/.gitignore b/arch/x86/kernel/.gitignore
index 4ea38a39aed4..08f4fd731469 100644
--- a/arch/x86/kernel/.gitignore
+++ b/arch/x86/kernel/.gitignore
@@ -1,2 +1,3 @@
 vsyscall.lds
 vsyscall_32.lds
+vmlinux.lds
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 940185ecaeda..6e64aaf00d1d 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -328,18 +328,18 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = {
 #endif
 	{
 		.callback = set_bf_sort,
-		.ident = "HP ProLiant DL360",
+		.ident = "HP ProLiant DL385 G2",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "HP"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL360"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL385 G2"),
 		},
 	},
 	{
 		.callback = set_bf_sort,
-		.ident = "HP ProLiant DL380",
+		.ident = "HP ProLiant DL585 G2",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "HP"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL380"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"),
 		},
 	},
 	{}
diff --git a/crypto/chainiv.c b/crypto/chainiv.c
index 6da3f577e4db..9affadee3287 100644
--- a/crypto/chainiv.c
+++ b/crypto/chainiv.c
@@ -117,6 +117,7 @@ static int chainiv_init(struct crypto_tfm *tfm)
 static int async_chainiv_schedule_work(struct async_chainiv_ctx *ctx)
 {
 	int queued;
+	int err = ctx->err;
 
 	if (!ctx->queue.qlen) {
 		smp_mb__before_clear_bit();
@@ -131,7 +132,7 @@ static int async_chainiv_schedule_work(struct async_chainiv_ctx *ctx)
 	BUG_ON(!queued);
 
 out:
-	return ctx->err;
+	return err;
 }
 
 static int async_chainiv_postpone_request(struct skcipher_givcrypt_request *req)
@@ -227,6 +228,7 @@ static void async_chainiv_do_postponed(struct work_struct *work)
 						     postponed);
 	struct skcipher_givcrypt_request *req;
 	struct ablkcipher_request *subreq;
+	int err;
 
 	/* Only handle one request at a time to avoid hogging keventd. */
 	spin_lock_bh(&ctx->lock);
@@ -241,7 +243,11 @@ static void async_chainiv_do_postponed(struct work_struct *work)
 	subreq = skcipher_givcrypt_reqctx(req);
 	subreq->base.flags |= CRYPTO_TFM_REQ_MAY_SLEEP;
 
-	async_chainiv_givencrypt_tail(req);
+	err = async_chainiv_givencrypt_tail(req);
+
+	local_bh_disable();
+	skcipher_givcrypt_complete(req, err);
+	local_bh_enable();
 }
 
 static int async_chainiv_init(struct crypto_tfm *tfm)
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 6beabc5abd07..e47f6e02133c 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -586,12 +586,6 @@ static void test_cipher(char *algo, int enc,
 	j = 0;
 	for (i = 0; i < tcount; i++) {
 
-		data = kzalloc(template[i].ilen, GFP_KERNEL);
-		if (!data)
-			continue;
-
-		memcpy(data, template[i].input, template[i].ilen);
-
 		if (template[i].iv)
 			memcpy(iv, template[i].iv, MAX_IVLEN);
 		else
@@ -613,10 +607,8 @@ static void test_cipher(char *algo, int enc,
 				printk("setkey() failed flags=%x\n",
 						crypto_ablkcipher_get_flags(tfm));
 
-				if (!template[i].fail) {
-					kfree(data);
+				if (!template[i].fail)
 					goto out;
-				}
 			}
 
 			temp = 0;
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 061817a3a0e5..5e6468a7ca4b 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1777,7 +1777,7 @@ static irqreturn_t ahci_interrupt(int irq, void *dev_instance)
 	struct ahci_host_priv *hpriv;
 	unsigned int i, handled = 0;
 	void __iomem *mmio;
-	u32 irq_stat;
+	u32 irq_stat, irq_masked;
 
 	VPRINTK("ENTER\n");
 
@@ -1786,16 +1786,17 @@ static irqreturn_t ahci_interrupt(int irq, void *dev_instance)
 
 	/* sigh.  0xffffffff is a valid return from h/w */
 	irq_stat = readl(mmio + HOST_IRQ_STAT);
-	irq_stat &= hpriv->port_map;
 	if (!irq_stat)
 		return IRQ_NONE;
 
+	irq_masked = irq_stat & hpriv->port_map;
+
 	spin_lock(&host->lock);
 
 	for (i = 0; i < host->n_ports; i++) {
 		struct ata_port *ap;
 
-		if (!(irq_stat & (1 << i)))
+		if (!(irq_masked & (1 << i)))
 			continue;
 
 		ap = host->ports[i];
@@ -1812,6 +1813,15 @@ static irqreturn_t ahci_interrupt(int irq, void *dev_instance)
 		handled = 1;
 	}
 
+	/* HOST_IRQ_STAT behaves as level triggered latch meaning that
+	 * it should be cleared after all the port events are cleared;
+	 * otherwise, it will raise a spurious interrupt after each
+	 * valid one.  Please read section 10.6.2 of ahci 1.1 for more
+	 * information.
+	 *
+	 * Also, use the unmasked value to clear interrupt as spurious
+	 * pending event on a dummy port might cause screaming IRQ.
+	 */
 	writel(irq_stat, mmio + HOST_IRQ_STAT);
 
 	spin_unlock(&host->lock);
diff --git a/drivers/ide/arm/palm_bk3710.c b/drivers/ide/arm/palm_bk3710.c
index cc24803fadff..2f2b4f4cf229 100644
--- a/drivers/ide/arm/palm_bk3710.c
+++ b/drivers/ide/arm/palm_bk3710.c
@@ -76,7 +76,7 @@ struct palm_bk3710_udmatiming {
 
 #include "../ide-timing.h"
 
-static long ide_palm_clk;
+static unsigned ideclk_period; /* in nanoseconds */
 
 static const struct palm_bk3710_udmatiming palm_bk3710_udmatimings[6] = {
 	{160, 240},		/* UDMA Mode 0 */
@@ -86,8 +86,6 @@ static const struct palm_bk3710_udmatiming palm_bk3710_udmatimings[6] = {
 	{85,  60},		/* UDMA Mode 4 */
 };
 
-static struct clk *ideclkp;
-
 static void palm_bk3710_setudmamode(void __iomem *base, unsigned int dev,
 				    unsigned int mode)
 {
@@ -97,10 +95,10 @@ static void palm_bk3710_setudmamode(void __iomem *base, unsigned int dev,
 
 	/* DMA Data Setup */
 	t0 = DIV_ROUND_UP(palm_bk3710_udmatimings[mode].cycletime,
-			  ide_palm_clk) - 1;
-	tenv = DIV_ROUND_UP(20, ide_palm_clk) - 1;
+			  ideclk_period) - 1;
+	tenv = DIV_ROUND_UP(20, ideclk_period) - 1;
 	trp = DIV_ROUND_UP(palm_bk3710_udmatimings[mode].rptime,
-			   ide_palm_clk) - 1;
+			   ideclk_period) - 1;
 
 	/* udmatim Register */
 	val16 = readw(base + BK3710_UDMATIM) & (dev ? 0xFF0F : 0xFFF0);
@@ -141,8 +139,8 @@ static void palm_bk3710_setdmamode(void __iomem *base, unsigned int dev,
 	cycletime = max_t(int, t->cycle, min_cycle);
 
 	/* DMA Data Setup */
-	t0 = DIV_ROUND_UP(cycletime, ide_palm_clk);
-	td = DIV_ROUND_UP(t->active, ide_palm_clk);
+	t0 = DIV_ROUND_UP(cycletime, ideclk_period);
+	td = DIV_ROUND_UP(t->active, ideclk_period);
 	tkw = t0 - td - 1;
 	td -= 1;
 
@@ -168,9 +166,9 @@ static void palm_bk3710_setpiomode(void __iomem *base, ide_drive_t *mate,
 	struct ide_timing *t;
 
 	/* PIO Data Setup */
-	t0 = DIV_ROUND_UP(cycletime, ide_palm_clk);
+	t0 = DIV_ROUND_UP(cycletime, ideclk_period);
 	t2 = DIV_ROUND_UP(ide_timing_find_mode(XFER_PIO_0 + mode)->active,
-			  ide_palm_clk);
+			  ideclk_period);
 
 	t2i = t0 - t2 - 1;
 	t2 -= 1;
@@ -192,8 +190,8 @@ static void palm_bk3710_setpiomode(void __iomem *base, ide_drive_t *mate,
 
 	/* TASKFILE Setup */
 	t = ide_timing_find_mode(XFER_PIO_0 + mode);
-	t0 = DIV_ROUND_UP(t->cyc8b, ide_palm_clk);
-	t2 = DIV_ROUND_UP(t->act8b, ide_palm_clk);
+	t0 = DIV_ROUND_UP(t->cyc8b, ideclk_period);
+	t2 = DIV_ROUND_UP(t->act8b, ideclk_period);
 
 	t2i = t0 - t2 - 1;
 	t2 -= 1;
@@ -350,22 +348,22 @@ static const struct ide_port_info __devinitdata palm_bk3710_port_info = {
 
 static int __devinit palm_bk3710_probe(struct platform_device *pdev)
 {
-	struct clk *clkp;
+	struct clk *clk;
 	struct resource *mem, *irq;
 	ide_hwif_t *hwif;
-	unsigned long base;
+	unsigned long base, rate;
 	int i;
 	hw_regs_t hw;
 	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 
-	clkp = clk_get(NULL, "IDECLK");
-	if (IS_ERR(clkp))
+	clk = clk_get(NULL, "IDECLK");
+	if (IS_ERR(clk))
 		return -ENODEV;
 
-	ideclkp = clkp;
-	clk_enable(ideclkp);
-	ide_palm_clk = clk_get_rate(ideclkp)/100000;
-	ide_palm_clk = (10000/ide_palm_clk) + 1;
+	clk_enable(clk);
+	rate = clk_get_rate(clk);
+	ideclk_period = 1000000000UL / rate;
+
 	/* Register the IDE interface with Linux ATA Interface */
 	memset(&hw, 0, sizeof(hw));
 
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 380fa0c8cc84..26e68b65b7cf 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -646,8 +646,6 @@ static int ide_register_port(ide_hwif_t *hwif)
 		goto out;
 	}
 
-	get_device(&hwif->gendev);
-
 	hwif->portdev = device_create_drvdata(ide_port_class, &hwif->gendev,
 					      MKDEV(0, 0), hwif, hwif->name);
 	if (IS_ERR(hwif->portdev)) {
@@ -1220,16 +1218,12 @@ static void drive_release_dev (struct device *dev)
 	complete(&drive->gendev_rel_comp);
 }
 
-#ifndef ide_default_irq
-#define ide_default_irq(irq) 0
-#endif
-
 static int hwif_init(ide_hwif_t *hwif)
 {
 	int old_irq;
 
 	if (!hwif->irq) {
-		hwif->irq = ide_default_irq(hwif->io_ports.data_addr);
+		hwif->irq = __ide_default_irq(hwif->io_ports.data_addr);
 		if (!hwif->irq) {
 			printk("%s: DISABLED, NO IRQ\n", hwif->name);
 			return 0;
@@ -1259,7 +1253,7 @@ static int hwif_init(ide_hwif_t *hwif)
 	 *	It failed to initialise. Find the default IRQ for 
 	 *	this port and try that.
 	 */
-	hwif->irq = ide_default_irq(hwif->io_ports.data_addr);
+	hwif->irq = __ide_default_irq(hwif->io_ports.data_addr);
 	if (!hwif->irq) {
 		printk("%s: Disabled unable to get IRQ %d.\n",
 			hwif->name, old_irq);
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index c758dcb13b14..300431d080a9 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -315,13 +315,14 @@ void ide_unregister(ide_hwif_t *hwif)
 
 	BUG_ON(in_interrupt());
 	BUG_ON(irqs_disabled());
+
 	mutex_lock(&ide_cfg_mtx);
-	spin_lock_irq(&ide_lock);
-	if (!hwif->present)
-		goto abort;
-	__ide_port_unregister_devices(hwif);
-	hwif->present = 0;
 
+	spin_lock_irq(&ide_lock);
+	if (hwif->present) {
+		__ide_port_unregister_devices(hwif);
+		hwif->present = 0;
+	}
 	spin_unlock_irq(&ide_lock);
 
 	ide_proc_unregister_port(hwif);
@@ -351,16 +352,15 @@ void ide_unregister(ide_hwif_t *hwif)
 	blk_unregister_region(MKDEV(hwif->major, 0), MAX_DRIVES<<PARTN_BITS);
 	kfree(hwif->sg_table);
 	unregister_blkdev(hwif->major, hwif->name);
-	spin_lock_irq(&ide_lock);
 
 	if (hwif->dma_base)
 		ide_release_dma_engine(hwif);
 
+	spin_lock_irq(&ide_lock);
 	/* restore hwif data to pristine status */
 	ide_init_port_data(hwif, hwif->index);
-
-abort:
 	spin_unlock_irq(&ide_lock);
+
 	mutex_unlock(&ide_cfg_mtx);
 }
 
@@ -1094,13 +1094,6 @@ struct bus_type ide_bus_type = {
 
 EXPORT_SYMBOL_GPL(ide_bus_type);
 
-static void ide_port_class_release(struct device *portdev)
-{
-	ide_hwif_t *hwif = dev_get_drvdata(portdev);
-
-	put_device(&hwif->gendev);
-}
-
 int ide_vlb_clk;
 EXPORT_SYMBOL_GPL(ide_vlb_clk);
 
@@ -1305,7 +1298,6 @@ static int __init ide_init(void)
 		ret = PTR_ERR(ide_port_class);
 		goto out_port_class;
 	}
-	ide_port_class->dev_release = ide_port_class_release;
 
 	init_ide_data();
 
diff --git a/drivers/ide/pci/it8213.c b/drivers/ide/pci/it8213.c
index 9053c8771e6e..2b71bdf74e73 100644
--- a/drivers/ide/pci/it8213.c
+++ b/drivers/ide/pci/it8213.c
@@ -184,8 +184,7 @@ static const struct ide_port_info it8213_chipsets[] __devinitdata = {
 
 static int __devinit it8213_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_setup_pci_device(dev, &it8213_chipsets[id->driver_data]);
-	return 0;
+	return ide_setup_pci_device(dev, &it8213_chipsets[id->driver_data]);
 }
 
 static const struct pci_device_id it8213_pci_tbl[] = {
diff --git a/drivers/ide/pci/ns87415.c b/drivers/ide/pci/ns87415.c
index fec4955f449b..a7a41bb82778 100644
--- a/drivers/ide/pci/ns87415.c
+++ b/drivers/ide/pci/ns87415.c
@@ -225,10 +225,6 @@ static int ns87415_dma_setup(ide_drive_t *drive)
 	return 1;
 }
 
-#ifndef ide_default_irq
-#define ide_default_irq(irq) 0
-#endif
-
 static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif)
 {
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
@@ -288,7 +284,7 @@ static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif)
 	}
 
 	if (!using_inta)
-		hwif->irq = ide_default_irq(hwif->io_ports.data_addr);
+		hwif->irq = __ide_default_irq(hwif->io_ports.data_addr);
 	else if (!hwif->irq && hwif->mate && hwif->mate->irq)
 		hwif->irq = hwif->mate->irq;	/* share IRQ with mate */
 
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 8934178a23ee..95f82cfb6c54 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1096,7 +1096,9 @@ static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, ch
 	struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
 
 	PDBG("%s dev 0x%p\n", __func__, dev);
+	rtnl_lock();
 	lldev->ethtool_ops->get_drvinfo(lldev, &info);
+	rtnl_unlock();
 	return sprintf(buf, "%s\n", info.fw_version);
 }
 
@@ -1109,7 +1111,9 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
 	struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
 
 	PDBG("%s dev 0x%p\n", __func__, dev);
+	rtnl_lock();
 	lldev->ethtool_ops->get_drvinfo(lldev, &info);
+	rtnl_unlock();
 	return sprintf(buf, "%s\n", info.driver);
 }
 
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 54c8ee28fcc4..3b27df52456b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2017,12 +2017,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
 			 */
 			s->uptodate++;
 			return 0; /* uptodate + compute == disks */
-		} else if ((s->uptodate < disks - 1) &&
-			test_bit(R5_Insync, &dev->flags)) {
-			/* Note: we hold off compute operations while checks are
-			 * in flight, but we still prefer 'compute' over 'read'
-			 * hence we only read if (uptodate < * disks-1)
-			 */
+		} else if (test_bit(R5_Insync, &dev->flags)) {
 			set_bit(R5_LOCKED, &dev->flags);
 			set_bit(R5_Wantread, &dev->flags);
 			if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 65210fca37ed..d89475d36988 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -114,6 +114,7 @@ static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data)
 	unsigned int nob = data->blocks;
 	unsigned long long clks;
 	unsigned int timeout;
+	bool dalgn = 0;
 	u32 dcmd;
 	int i;
 
@@ -152,6 +153,9 @@ static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data)
 		host->sg_cpu[i].dcmd = dcmd | length;
 		if (length & 31 && !(data->flags & MMC_DATA_READ))
 			host->sg_cpu[i].dcmd |= DCMD_ENDIRQEN;
+		/* Not aligned to 8-byte boundary? */
+		if (sg_dma_address(&data->sg[i]) & 0x7)
+			dalgn = 1;
 		if (data->flags & MMC_DATA_READ) {
 			host->sg_cpu[i].dsadr = host->res->start + MMC_RXFIFO;
 			host->sg_cpu[i].dtadr = sg_dma_address(&data->sg[i]);
@@ -165,6 +169,15 @@ static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data)
 	host->sg_cpu[host->dma_len - 1].ddadr = DDADR_STOP;
 	wmb();
 
+	/*
+	 * The PXA27x DMA controller encounters overhead when working with
+	 * unaligned (to 8-byte boundaries) data, so switch on byte alignment
+	 * mode only if we have unaligned data.
+	 */
+	if (dalgn)
+		DALGN |= (1 << host->dma);
+	else
+		DALGN &= (1 << host->dma);
 	DDADR(host->dma) = host->sg_dma;
 	DCSR(host->dma) = DCSR_RUN;
 }
diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h
index fe872fbd671e..e01926b7b5b7 100644
--- a/drivers/net/ehea/ehea.h
+++ b/drivers/net/ehea/ehea.h
@@ -40,7 +40,7 @@
 #include <asm/io.h>
 
 #define DRV_NAME	"ehea"
-#define DRV_VERSION	"EHEA_0091"
+#define DRV_VERSION	"EHEA_0092"
 
 /* eHEA capability flags */
 #define DLPAR_PORT_ADD_REM 1
@@ -452,7 +452,7 @@ struct ehea_bcmc_reg_entry {
 struct ehea_bcmc_reg_array {
 	struct ehea_bcmc_reg_entry *arr;
 	int num_entries;
-	struct mutex lock;
+	spinlock_t lock;
 };
 
 #define EHEA_PORT_UP 1
@@ -478,6 +478,7 @@ struct ehea_port {
 	int num_add_tx_qps;
 	int num_mcs;
 	int resets;
+	u64 flags;
 	u64 mac_addr;
 	u32 logical_port_id;
 	u32 port_speed;
@@ -501,7 +502,8 @@ struct port_res_cfg {
 };
 
 enum ehea_flag_bits {
-	__EHEA_STOP_XFER
+	__EHEA_STOP_XFER,
+	__EHEA_DISABLE_PORT_RESET
 };
 
 void ehea_set_ethtool_ops(struct net_device *netdev);
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 075fd547421e..0920b796bd78 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -118,6 +118,7 @@ static struct of_device_id ehea_device_table[] = {
 	},
 	{},
 };
+MODULE_DEVICE_TABLE(of, ehea_device_table);
 
 static struct of_platform_driver ehea_driver = {
 	.name = "ehea",
@@ -137,6 +138,12 @@ void ehea_dump(void *adr, int len, char *msg)
 	}
 }
 
+void ehea_schedule_port_reset(struct ehea_port *port)
+{
+	if (!test_bit(__EHEA_DISABLE_PORT_RESET, &port->flags))
+		schedule_work(&port->reset_task);
+}
+
 static void ehea_update_firmware_handles(void)
 {
 	struct ehea_fw_handle_entry *arr = NULL;
@@ -241,7 +248,7 @@ static void ehea_update_bcmc_registrations(void)
 		}
 
 	if (num_registrations) {
-		arr = kzalloc(num_registrations * sizeof(*arr), GFP_KERNEL);
+		arr = kzalloc(num_registrations * sizeof(*arr), GFP_ATOMIC);
 		if (!arr)
 			return;  /* Keep the existing array */
 	} else
@@ -301,7 +308,7 @@ static struct net_device_stats *ehea_get_stats(struct net_device *dev)
 
 	memset(stats, 0, sizeof(*stats));
 
-	cb2 = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	cb2 = kzalloc(PAGE_SIZE, GFP_ATOMIC);
 	if (!cb2) {
 		ehea_error("no mem for cb2");
 		goto out;
@@ -587,7 +594,7 @@ static int ehea_treat_poll_error(struct ehea_port_res *pr, int rq,
 				   "Resetting port.", pr->qp->init_attr.qp_nr);
 			ehea_dump(cqe, sizeof(*cqe), "CQE");
 		}
-		schedule_work(&pr->port->reset_task);
+		ehea_schedule_port_reset(pr->port);
 		return 1;
 	}
 
@@ -616,7 +623,7 @@ static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
 	*tcph = tcp_hdr(skb);
 
 	/* check if ip header and tcp header are complete */
-	if (iph->tot_len < ip_len + tcp_hdrlen(skb))
+	if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
 		return -1;
 
 	*hdr_flags = LRO_IPV4 | LRO_TCP;
@@ -765,7 +772,7 @@ static struct ehea_cqe *ehea_proc_cqes(struct ehea_port_res *pr, int my_quota)
 			ehea_error("Send Completion Error: Resetting port");
 			if (netif_msg_tx_err(pr->port))
 				ehea_dump(cqe, sizeof(*cqe), "Send CQE");
-			schedule_work(&pr->port->reset_task);
+			ehea_schedule_port_reset(pr->port);
 			break;
 		}
 
@@ -885,7 +892,7 @@ static irqreturn_t ehea_qp_aff_irq_handler(int irq, void *param)
 		eqe = ehea_poll_eq(port->qp_eq);
 	}
 
-	schedule_work(&port->reset_task);
+	ehea_schedule_port_reset(port);
 
 	return IRQ_HANDLED;
 }
@@ -1763,7 +1770,7 @@ static int ehea_set_mac_addr(struct net_device *dev, void *sa)
 
 	memcpy(dev->dev_addr, mac_addr->sa_data, dev->addr_len);
 
-	mutex_lock(&ehea_bcmc_regs.lock);
+	spin_lock(&ehea_bcmc_regs.lock);
 
 	/* Deregister old MAC in pHYP */
 	if (port->state == EHEA_PORT_UP) {
@@ -1785,7 +1792,7 @@ static int ehea_set_mac_addr(struct net_device *dev, void *sa)
 
 out_upregs:
 	ehea_update_bcmc_registrations();
-	mutex_unlock(&ehea_bcmc_regs.lock);
+	spin_unlock(&ehea_bcmc_regs.lock);
 out_free:
 	kfree(cb0);
 out:
@@ -1947,7 +1954,7 @@ static void ehea_set_multicast_list(struct net_device *dev)
 	}
 	ehea_promiscuous(dev, 0);
 
-	mutex_lock(&ehea_bcmc_regs.lock);
+	spin_lock(&ehea_bcmc_regs.lock);
 
 	if (dev->flags & IFF_ALLMULTI) {
 		ehea_allmulti(dev, 1);
@@ -1978,7 +1985,7 @@ static void ehea_set_multicast_list(struct net_device *dev)
 	}
 out:
 	ehea_update_bcmc_registrations();
-	mutex_unlock(&ehea_bcmc_regs.lock);
+	spin_unlock(&ehea_bcmc_regs.lock);
 	return;
 }
 
@@ -2497,7 +2504,7 @@ static int ehea_up(struct net_device *dev)
 		}
 	}
 
-	mutex_lock(&ehea_bcmc_regs.lock);
+	spin_lock(&ehea_bcmc_regs.lock);
 
 	ret = ehea_broadcast_reg_helper(port, H_REG_BCMC);
 	if (ret) {
@@ -2520,7 +2527,7 @@ out:
 		ehea_info("Failed starting %s. ret=%i", dev->name, ret);
 
 	ehea_update_bcmc_registrations();
-	mutex_unlock(&ehea_bcmc_regs.lock);
+	spin_unlock(&ehea_bcmc_regs.lock);
 
 	ehea_update_firmware_handles();
 	mutex_unlock(&ehea_fw_handles.lock);
@@ -2575,7 +2582,7 @@ static int ehea_down(struct net_device *dev)
 
 	mutex_lock(&ehea_fw_handles.lock);
 
-	mutex_lock(&ehea_bcmc_regs.lock);
+	spin_lock(&ehea_bcmc_regs.lock);
 	ehea_drop_multicast_list(dev);
 	ehea_broadcast_reg_helper(port, H_DEREG_BCMC);
 
@@ -2584,7 +2591,7 @@ static int ehea_down(struct net_device *dev)
 	port->state = EHEA_PORT_DOWN;
 
 	ehea_update_bcmc_registrations();
-	mutex_unlock(&ehea_bcmc_regs.lock);
+	spin_unlock(&ehea_bcmc_regs.lock);
 
 	ret = ehea_clean_all_portres(port);
 	if (ret)
@@ -2605,13 +2612,14 @@ static int ehea_stop(struct net_device *dev)
 	if (netif_msg_ifdown(port))
 		ehea_info("disabling port %s", dev->name);
 
+	set_bit(__EHEA_DISABLE_PORT_RESET, &port->flags);
 	cancel_work_sync(&port->reset_task);
-
 	mutex_lock(&port->port_lock);
 	netif_stop_queue(dev);
 	port_napi_disable(port);
 	ret = ehea_down(dev);
 	mutex_unlock(&port->port_lock);
+	clear_bit(__EHEA_DISABLE_PORT_RESET, &port->flags);
 	return ret;
 }
 
@@ -2941,7 +2949,7 @@ static void ehea_tx_watchdog(struct net_device *dev)
 
 	if (netif_carrier_ok(dev) &&
 	    !test_bit(__EHEA_STOP_XFER, &ehea_driver_flags))
-		schedule_work(&port->reset_task);
+		ehea_schedule_port_reset(port);
 }
 
 int ehea_sense_adapter_attr(struct ehea_adapter *adapter)
@@ -3590,7 +3598,7 @@ int __init ehea_module_init(void)
 	memset(&ehea_bcmc_regs, 0, sizeof(ehea_bcmc_regs));
 
 	mutex_init(&ehea_fw_handles.lock);
-	mutex_init(&ehea_bcmc_regs.lock);
+	spin_lock_init(&ehea_bcmc_regs.lock);
 
 	ret = check_module_parm();
 	if (ret)
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 2cb244763292..20d4fe96a81c 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -4194,12 +4194,23 @@ static int nv_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 
 	netif_carrier_off(dev);
 	if (netif_running(dev)) {
+		unsigned long flags;
+
 		nv_disable_irq(dev);
 		netif_tx_lock_bh(dev);
-		spin_lock(&np->lock);
+		/* with plain spinlock lockdep complains */
+		spin_lock_irqsave(&np->lock, flags);
 		/* stop engines */
+		/* FIXME:
+		 * this can take some time, and interrupts are disabled
+		 * due to spin_lock_irqsave, but let's hope no daemon
+		 * is going to change the settings very often...
+		 * Worst case:
+		 * NV_RXSTOP_DELAY1MAX + NV_TXSTOP_DELAY1MAX
+		 * + some minor delays, which is up to a second approximately
+		 */
 		nv_stop_rxtx(dev);
-		spin_unlock(&np->lock);
+		spin_unlock_irqrestore(&np->lock, flags);
 		netif_tx_unlock_bh(dev);
 	}
 
diff --git a/drivers/net/fs_enet/mac-fcc.c b/drivers/net/fs_enet/mac-fcc.c
index e36321152d50..8268b3535b30 100644
--- a/drivers/net/fs_enet/mac-fcc.c
+++ b/drivers/net/fs_enet/mac-fcc.c
@@ -463,6 +463,9 @@ static void restart(struct net_device *dev)
 	else
 		C32(fccp, fcc_fpsmr, FCC_PSMR_FDE | FCC_PSMR_LPB);
 
+	/* Restore multicast and promiscuous settings */
+	set_multicast_list(dev);
+
 	S32(fccp, fcc_gfmr, FCC_GFMR_ENR | FCC_GFMR_ENT);
 }
 
diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c
index 5d2108c5ac7c..babc79ad490b 100644
--- a/drivers/net/ibm_newemac/core.c
+++ b/drivers/net/ibm_newemac/core.c
@@ -1636,6 +1636,12 @@ static int emac_poll_rx(void *param, int budget)
 			goto next;
 		}
 
+		if (len < ETH_HLEN) {
+			++dev->estats.rx_dropped_stack;
+			emac_recycle_rx_skb(dev, slot, len);
+			goto next;
+		}
+
 		if (len && len < EMAC_RX_COPY_THRESH) {
 			struct sk_buff *copy_skb =
 			    alloc_skb(len + EMAC_RX_SKB_HEADROOM + 2, GFP_ATOMIC);
@@ -2719,6 +2725,8 @@ static int __devinit emac_probe(struct of_device *ofdev,
 	/* Clean rings */
 	memset(dev->tx_desc, 0, NUM_TX_BUFF * sizeof(struct mal_descriptor));
 	memset(dev->rx_desc, 0, NUM_RX_BUFF * sizeof(struct mal_descriptor));
+	memset(dev->tx_skb, 0, NUM_TX_BUFF * sizeof(struct sk_buff *));
+	memset(dev->rx_skb, 0, NUM_RX_BUFF * sizeof(struct sk_buff *));
 
 	/* Attach to ZMII, if needed */
 	if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII) &&
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index 3b2a6c598088..993d87c9296f 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -277,7 +277,7 @@ static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
 	*tcph = tcp_hdr(skb);
 
 	/* check if ip header and tcp header are complete */
-	if (iph->tot_len < ip_len + tcp_hdrlen(skb))
+	if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
 		return -1;
 
 	*hdr_flags = LRO_IPV4 | LRO_TCP;
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 520bb0b1a9a2..6d35155c7145 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1008,6 +1008,7 @@ static int fr_rx(struct sk_buff *skb)
 		stats->rx_bytes += skb->len;
 		if (pvc->state.becn)
 			stats->rx_compressed++;
+		skb->dev = dev;
 		netif_rx(skb);
 		return NET_RX_SUCCESS;
 	} else {
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c
index 62a3d8f8563e..f5387a7a76c0 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
@@ -588,8 +588,12 @@ static void iwl3945_add_radiotap(struct iwl3945_priv *priv,
 
 	if (rate == -1)
 		iwl3945_rt->rt_rate = 0;
-	else
+	else {
+		if (stats->band == IEEE80211_BAND_5GHZ)
+			rate += IWL_FIRST_OFDM_RATE;
+
 		iwl3945_rt->rt_rate = iwl3945_rates[rate].ieee;
+	}
 
 	/* antenna number */
 	antenna = phy_flags_hw & RX_RES_PHY_FLAGS_ANTENNA_MSK;
diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c
index bf19eb8aafd0..de330ae0ca95 100644
--- a/drivers/net/wireless/iwlwifi/iwl-4965.c
+++ b/drivers/net/wireless/iwlwifi/iwl-4965.c
@@ -3528,8 +3528,12 @@ static void iwl4965_add_radiotap(struct iwl_priv *priv,
 
 	if (rate == -1)
 		iwl4965_rt->rt_rate = 0;
-	else
+	else {
+		if (stats->band == IEEE80211_BAND_5GHZ)
+			rate += IWL_FIRST_OFDM_RATE;
+
 		iwl4965_rt->rt_rate = iwl4965_rates[rate].ieee;
+	}
 
 	/*
 	 * "antenna number"
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index b1b3c523185d..6027e1119c3f 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -6687,7 +6687,8 @@ static int iwl3945_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
 
 	if (priv->iw_mode == IEEE80211_IF_TYPE_MNTR) {
 		IWL_DEBUG_MAC80211("leave - monitor\n");
-		return -1;
+		dev_kfree_skb_any(skb);
+		return 0;
 	}
 
 	IWL_DEBUG_TX("dev->xmit(%d bytes) at rate 0x%02x\n", skb->len,
diff --git a/drivers/net/wireless/iwlwifi/iwl4965-base.c b/drivers/net/wireless/iwlwifi/iwl4965-base.c
index 5ed16ce78468..0bd55bb19739 100644
--- a/drivers/net/wireless/iwlwifi/iwl4965-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl4965-base.c
@@ -6237,7 +6237,8 @@ static int iwl4965_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
 
 	if (priv->iw_mode == IEEE80211_IF_TYPE_MNTR) {
 		IWL_DEBUG_MAC80211("leave - monitor\n");
-		return -1;
+		dev_kfree_skb_any(skb);
+		return 0;
 	}
 
 	IWL_DEBUG_TX("dev->xmit(%d bytes) at rate 0x%02x\n", skb->len,
diff --git a/drivers/net/wireless/libertas/if_usb.c b/drivers/net/wireless/libertas/if_usb.c
index 8032df72aaab..36288b29abf7 100644
--- a/drivers/net/wireless/libertas/if_usb.c
+++ b/drivers/net/wireless/libertas/if_usb.c
@@ -925,6 +925,7 @@ static struct usb_driver if_usb_driver = {
 	.id_table = if_usb_table,
 	.suspend = if_usb_suspend,
 	.resume = if_usb_resume,
+	.reset_resume = if_usb_resume,
 };
 
 static int __init if_usb_init_module(void)
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 7158dbb6e4b4..42a436478b78 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -924,6 +924,15 @@ static int register_root_hub(struct usb_hcd *hcd)
 	return retval;
 }
 
+void usb_enable_root_hub_irq (struct usb_bus *bus)
+{
+	struct usb_hcd *hcd;
+
+	hcd = container_of (bus, struct usb_hcd, self);
+	if (hcd->driver->hub_irq_enable && hcd->state != HC_STATE_HALT)
+		hcd->driver->hub_irq_enable (hcd);
+}
+
 
 /*-------------------------------------------------------------------------*/
 
diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h
index a0bf5df6cb6f..b9de1569b39e 100644
--- a/drivers/usb/core/hcd.h
+++ b/drivers/usb/core/hcd.h
@@ -210,6 +210,8 @@ struct hc_driver {
 	int	(*bus_suspend)(struct usb_hcd *);
 	int	(*bus_resume)(struct usb_hcd *);
 	int	(*start_port_reset)(struct usb_hcd *, unsigned port_num);
+	void	(*hub_irq_enable)(struct usb_hcd *);
+		/* Needed only if port-change IRQs are level-triggered */
 
 		/* force handover of high-speed port to full-speed companion */
 	void	(*relinquish_port)(struct usb_hcd *, int);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 512d2d57d41e..4cfe32a16c37 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2073,6 +2073,8 @@ int usb_port_resume(struct usb_device *udev)
 	}
 
 	clear_bit(port1, hub->busy_bits);
+	if (!hub->hdev->parent && !hub->busy_bits[0])
+		usb_enable_root_hub_irq(hub->hdev->bus);
 
 	if (status == 0)
 		status = finish_port_resume(udev);
@@ -3002,6 +3004,11 @@ static void hub_events(void)
 
 		hub->activating = 0;
 
+		/* If this is a root hub, tell the HCD it's okay to
+		 * re-enable port-change interrupts now. */
+		if (!hdev->parent && !hub->busy_bits[0])
+			usb_enable_root_hub_irq(hdev->bus);
+
 loop_autopm:
 		/* Allow autosuspend if we're not going to run again */
 		if (list_empty(&hub->event_list))
@@ -3227,6 +3234,8 @@ int usb_reset_device(struct usb_device *udev)
 			break;
 	}
 	clear_bit(port1, parent_hub->busy_bits);
+	if (!parent_hdev->parent && !parent_hub->busy_bits[0])
+		usb_enable_root_hub_irq(parent_hdev->bus);
 
 	if (ret < 0)
 		goto re_enumerate;
diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c
index c96db1153dcf..e534f9de0f05 100644
--- a/drivers/usb/host/ohci-at91.c
+++ b/drivers/usb/host/ohci-at91.c
@@ -261,6 +261,7 @@ static const struct hc_driver ohci_at91_hc_driver = {
 	 */
 	.hub_status_data =	ohci_hub_status_data,
 	.hub_control =		ohci_hub_control,
+	.hub_irq_enable =	ohci_rhsc_enable,
 #ifdef CONFIG_PM
 	.bus_suspend =		ohci_bus_suspend,
 	.bus_resume =		ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-au1xxx.c b/drivers/usb/host/ohci-au1xxx.c
index 1b9abdba920b..f90fe0c7373f 100644
--- a/drivers/usb/host/ohci-au1xxx.c
+++ b/drivers/usb/host/ohci-au1xxx.c
@@ -288,6 +288,7 @@ static const struct hc_driver ohci_au1xxx_hc_driver = {
 	 */
 	.hub_status_data =	ohci_hub_status_data,
 	.hub_control =		ohci_hub_control,
+	.hub_irq_enable =	ohci_rhsc_enable,
 #ifdef	CONFIG_PM
 	.bus_suspend =		ohci_bus_suspend,
 	.bus_resume =		ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-ep93xx.c b/drivers/usb/host/ohci-ep93xx.c
index 06aadfb0ec29..5adaf36e47d0 100644
--- a/drivers/usb/host/ohci-ep93xx.c
+++ b/drivers/usb/host/ohci-ep93xx.c
@@ -135,6 +135,7 @@ static struct hc_driver ohci_ep93xx_hc_driver = {
 	.get_frame_number	= ohci_get_frame,
 	.hub_status_data	= ohci_hub_status_data,
 	.hub_control		= ohci_hub_control,
+	.hub_irq_enable		= ohci_rhsc_enable,
 #ifdef CONFIG_PM
 	.bus_suspend		= ohci_bus_suspend,
 	.bus_resume		= ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-hub.c b/drivers/usb/host/ohci-hub.c
index 79a78029f896..b56739221d11 100644
--- a/drivers/usb/host/ohci-hub.c
+++ b/drivers/usb/host/ohci-hub.c
@@ -36,6 +36,18 @@
 
 /*-------------------------------------------------------------------------*/
 
+/* hcd->hub_irq_enable() */
+static void ohci_rhsc_enable (struct usb_hcd *hcd)
+{
+	struct ohci_hcd		*ohci = hcd_to_ohci (hcd);
+
+	spin_lock_irq(&ohci->lock);
+	if (!ohci->autostop)
+		del_timer(&hcd->rh_timer);	/* Prevent next poll */
+	ohci_writel(ohci, OHCI_INTR_RHSC, &ohci->regs->intrenable);
+	spin_unlock_irq(&ohci->lock);
+}
+
 #define OHCI_SCHED_ENABLES \
 	(OHCI_CTRL_CLE|OHCI_CTRL_BLE|OHCI_CTRL_PLE|OHCI_CTRL_IE)
 
@@ -362,28 +374,18 @@ static int ohci_root_hub_state_changes(struct ohci_hcd *ohci, int changed,
 		int any_connected)
 {
 	int	poll_rh = 1;
-	int	rhsc;
 
-	rhsc = ohci_readl(ohci, &ohci->regs->intrenable) & OHCI_INTR_RHSC;
 	switch (ohci->hc_control & OHCI_CTRL_HCFS) {
 
 	case OHCI_USB_OPER:
-		/* If no status changes are pending, enable status-change
-		 * interrupts.
-		 */
-		if (!rhsc && !changed) {
-			rhsc = OHCI_INTR_RHSC;
-			ohci_writel(ohci, rhsc, &ohci->regs->intrenable);
-		}
-
-		/* Keep on polling until we know a device is connected
-		 * and RHSC is enabled, or until we autostop.
-		 */
+		/* keep on polling until we know a device is connected
+		 * and RHSC is enabled */
 		if (!ohci->autostop) {
 			if (any_connected ||
 					!device_may_wakeup(&ohci_to_hcd(ohci)
 						->self.root_hub->dev)) {
-				if (rhsc)
+				if (ohci_readl(ohci, &ohci->regs->intrenable) &
+						OHCI_INTR_RHSC)
 					poll_rh = 0;
 			} else {
 				ohci->autostop = 1;
@@ -396,13 +398,12 @@ static int ohci_root_hub_state_changes(struct ohci_hcd *ohci, int changed,
 				ohci->autostop = 0;
 				ohci->next_statechange = jiffies +
 						STATECHANGE_DELAY;
-			} else if (rhsc && time_after_eq(jiffies,
+			} else if (time_after_eq(jiffies,
 						ohci->next_statechange)
 					&& !ohci->ed_rm_list
 					&& !(ohci->hc_control &
 						OHCI_SCHED_ENABLES)) {
 				ohci_rh_suspend(ohci, 1);
-				poll_rh = 0;
 			}
 		}
 		break;
@@ -416,12 +417,6 @@ static int ohci_root_hub_state_changes(struct ohci_hcd *ohci, int changed,
 			else
 				usb_hcd_resume_root_hub(ohci_to_hcd(ohci));
 		} else {
-			if (!rhsc && (ohci->autostop ||
-					ohci_to_hcd(ohci)->self.root_hub->
-						do_remote_wakeup))
-				ohci_writel(ohci, OHCI_INTR_RHSC,
-						&ohci->regs->intrenable);
-
 			/* everything is idle, no need for polling */
 			poll_rh = 0;
 		}
@@ -443,16 +438,12 @@ static inline int ohci_rh_resume(struct ohci_hcd *ohci)
 static int ohci_root_hub_state_changes(struct ohci_hcd *ohci, int changed,
 		int any_connected)
 {
-	/* If RHSC is enabled, don't poll */
-	if (ohci_readl(ohci, &ohci->regs->intrenable) & OHCI_INTR_RHSC)
-		return 0;
+	int	poll_rh = 1;
 
-	/* If no status changes are pending, enable status-change interrupts */
-	if (!changed) {
-		ohci_writel(ohci, OHCI_INTR_RHSC, &ohci->regs->intrenable);
-		return 0;
-	}
-	return 1;
+	/* keep on polling until RHSC is enabled */
+	if (ohci_readl(ohci, &ohci->regs->intrenable) & OHCI_INTR_RHSC)
+		poll_rh = 0;
+	return poll_rh;
 }
 
 #endif	/* CONFIG_PM */
diff --git a/drivers/usb/host/ohci-lh7a404.c b/drivers/usb/host/ohci-lh7a404.c
index 96d14fa1d833..13c12ed22252 100644
--- a/drivers/usb/host/ohci-lh7a404.c
+++ b/drivers/usb/host/ohci-lh7a404.c
@@ -193,6 +193,7 @@ static const struct hc_driver ohci_lh7a404_hc_driver = {
 	 */
 	.hub_status_data =	ohci_hub_status_data,
 	.hub_control =		ohci_hub_control,
+	.hub_irq_enable =	ohci_rhsc_enable,
 #ifdef	CONFIG_PM
 	.bus_suspend =		ohci_bus_suspend,
 	.bus_resume =		ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-omap.c b/drivers/usb/host/ohci-omap.c
index 6859fb5f1d6f..3a7c24c03671 100644
--- a/drivers/usb/host/ohci-omap.c
+++ b/drivers/usb/host/ohci-omap.c
@@ -466,6 +466,7 @@ static const struct hc_driver ohci_omap_hc_driver = {
 	 */
 	.hub_status_data =	ohci_hub_status_data,
 	.hub_control =		ohci_hub_control,
+	.hub_irq_enable =	ohci_rhsc_enable,
 #ifdef	CONFIG_PM
 	.bus_suspend =		ohci_bus_suspend,
 	.bus_resume =		ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c
index 3bf175d95a23..4696cc912e16 100644
--- a/drivers/usb/host/ohci-pci.c
+++ b/drivers/usb/host/ohci-pci.c
@@ -327,6 +327,7 @@ static const struct hc_driver ohci_pci_hc_driver = {
 	 */
 	.hub_status_data =	ohci_hub_status_data,
 	.hub_control =		ohci_hub_control,
+	.hub_irq_enable =	ohci_rhsc_enable,
 #ifdef	CONFIG_PM
 	.bus_suspend =		ohci_bus_suspend,
 	.bus_resume =		ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-pnx4008.c b/drivers/usb/host/ohci-pnx4008.c
index 664f07ee8732..28b458f20cc3 100644
--- a/drivers/usb/host/ohci-pnx4008.c
+++ b/drivers/usb/host/ohci-pnx4008.c
@@ -280,6 +280,7 @@ static const struct hc_driver ohci_pnx4008_hc_driver = {
 	 */
 	.hub_status_data = ohci_hub_status_data,
 	.hub_control = ohci_hub_control,
+	.hub_irq_enable = ohci_rhsc_enable,
 #ifdef	CONFIG_PM
 	.bus_suspend = ohci_bus_suspend,
 	.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-pnx8550.c b/drivers/usb/host/ohci-pnx8550.c
index 28467e288a93..605d59cba28e 100644
--- a/drivers/usb/host/ohci-pnx8550.c
+++ b/drivers/usb/host/ohci-pnx8550.c
@@ -201,6 +201,7 @@ static const struct hc_driver ohci_pnx8550_hc_driver = {
 	 */
 	.hub_status_data =	ohci_hub_status_data,
 	.hub_control =		ohci_hub_control,
+	.hub_irq_enable =	ohci_rhsc_enable,
 #ifdef	CONFIG_PM
 	.bus_suspend =		ohci_bus_suspend,
 	.bus_resume =		ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-ppc-of.c b/drivers/usb/host/ohci-ppc-of.c
index 50e55db13636..a67252791223 100644
--- a/drivers/usb/host/ohci-ppc-of.c
+++ b/drivers/usb/host/ohci-ppc-of.c
@@ -72,6 +72,7 @@ static const struct hc_driver ohci_ppc_of_hc_driver = {
 	 */
 	.hub_status_data =	ohci_hub_status_data,
 	.hub_control =		ohci_hub_control,
+	.hub_irq_enable =	ohci_rhsc_enable,
 #ifdef	CONFIG_PM
 	.bus_suspend =		ohci_bus_suspend,
 	.bus_resume =		ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-ppc-soc.c b/drivers/usb/host/ohci-ppc-soc.c
index cd3398b675b2..523c30125577 100644
--- a/drivers/usb/host/ohci-ppc-soc.c
+++ b/drivers/usb/host/ohci-ppc-soc.c
@@ -172,6 +172,7 @@ static const struct hc_driver ohci_ppc_soc_hc_driver = {
 	 */
 	.hub_status_data =	ohci_hub_status_data,
 	.hub_control =		ohci_hub_control,
+	.hub_irq_enable =	ohci_rhsc_enable,
 #ifdef	CONFIG_PM
 	.bus_suspend =		ohci_bus_suspend,
 	.bus_resume =		ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-ps3.c b/drivers/usb/host/ohci-ps3.c
index bfdeb0d22d05..c1935ae537f8 100644
--- a/drivers/usb/host/ohci-ps3.c
+++ b/drivers/usb/host/ohci-ps3.c
@@ -68,6 +68,7 @@ static const struct hc_driver ps3_ohci_hc_driver = {
 	.get_frame_number	= ohci_get_frame,
 	.hub_status_data	= ohci_hub_status_data,
 	.hub_control		= ohci_hub_control,
+	.hub_irq_enable		= ohci_rhsc_enable,
 	.start_port_reset	= ohci_start_port_reset,
 #if defined(CONFIG_PM)
 	.bus_suspend 		= ohci_bus_suspend,
diff --git a/drivers/usb/host/ohci-pxa27x.c b/drivers/usb/host/ohci-pxa27x.c
index 70b0d4b459e7..d4ee27d92be8 100644
--- a/drivers/usb/host/ohci-pxa27x.c
+++ b/drivers/usb/host/ohci-pxa27x.c
@@ -298,6 +298,7 @@ static const struct hc_driver ohci_pxa27x_hc_driver = {
 	 */
 	.hub_status_data =	ohci_hub_status_data,
 	.hub_control =		ohci_hub_control,
+	.hub_irq_enable =	ohci_rhsc_enable,
 #ifdef  CONFIG_PM
 	.bus_suspend =		ohci_bus_suspend,
 	.bus_resume =		ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-s3c2410.c b/drivers/usb/host/ohci-s3c2410.c
index a73d2ff322e2..ead4772f0f27 100644
--- a/drivers/usb/host/ohci-s3c2410.c
+++ b/drivers/usb/host/ohci-s3c2410.c
@@ -466,6 +466,7 @@ static const struct hc_driver ohci_s3c2410_hc_driver = {
 	 */
 	.hub_status_data =	ohci_s3c2410_hub_status_data,
 	.hub_control =		ohci_s3c2410_hub_control,
+	.hub_irq_enable =	ohci_rhsc_enable,
 #ifdef	CONFIG_PM
 	.bus_suspend =		ohci_bus_suspend,
 	.bus_resume =		ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-sa1111.c b/drivers/usb/host/ohci-sa1111.c
index 99438c65981b..0f48f2d99226 100644
--- a/drivers/usb/host/ohci-sa1111.c
+++ b/drivers/usb/host/ohci-sa1111.c
@@ -231,6 +231,7 @@ static const struct hc_driver ohci_sa1111_hc_driver = {
 	 */
 	.hub_status_data =	ohci_hub_status_data,
 	.hub_control =		ohci_hub_control,
+	.hub_irq_enable =	ohci_rhsc_enable,
 #ifdef	CONFIG_PM
 	.bus_suspend =		ohci_bus_suspend,
 	.bus_resume =		ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-sh.c b/drivers/usb/host/ohci-sh.c
index 60f03cc7ec4f..e7ee607278fe 100644
--- a/drivers/usb/host/ohci-sh.c
+++ b/drivers/usb/host/ohci-sh.c
@@ -68,6 +68,7 @@ static const struct hc_driver ohci_sh_hc_driver = {
 	 */
 	.hub_status_data =	ohci_hub_status_data,
 	.hub_control =		ohci_hub_control,
+	.hub_irq_enable =	ohci_rhsc_enable,
 #ifdef	CONFIG_PM
 	.bus_suspend =		ohci_bus_suspend,
 	.bus_resume =		ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-sm501.c b/drivers/usb/host/ohci-sm501.c
index e899a77dfb83..e610698c6b60 100644
--- a/drivers/usb/host/ohci-sm501.c
+++ b/drivers/usb/host/ohci-sm501.c
@@ -75,6 +75,7 @@ static const struct hc_driver ohci_sm501_hc_driver = {
 	 */
 	.hub_status_data =	ohci_hub_status_data,
 	.hub_control =		ohci_hub_control,
+	.hub_irq_enable =	ohci_rhsc_enable,
 #ifdef	CONFIG_PM
 	.bus_suspend =		ohci_bus_suspend,
 	.bus_resume =		ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-ssb.c b/drivers/usb/host/ohci-ssb.c
index c4265caec780..7275186db315 100644
--- a/drivers/usb/host/ohci-ssb.c
+++ b/drivers/usb/host/ohci-ssb.c
@@ -81,6 +81,7 @@ static const struct hc_driver ssb_ohci_hc_driver = {
 
 	.hub_status_data	= ohci_hub_status_data,
 	.hub_control		= ohci_hub_control,
+	.hub_irq_enable		= ohci_rhsc_enable,
 #ifdef	CONFIG_PM
 	.bus_suspend		= ohci_bus_suspend,
 	.bus_resume		= ohci_bus_resume,
diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c
index f29307405bb3..9b6323f768b2 100644
--- a/drivers/usb/host/u132-hcd.c
+++ b/drivers/usb/host/u132-hcd.c
@@ -2934,6 +2934,16 @@ static int u132_start_port_reset(struct usb_hcd *hcd, unsigned port_num)
 		return 0;
 }
 
+static void u132_hub_irq_enable(struct usb_hcd *hcd)
+{
+	struct u132 *u132 = hcd_to_u132(hcd);
+	if (u132->going > 1) {
+		dev_err(&u132->platform_dev->dev, "device has been removed %d\n"
+			, u132->going);
+	} else if (u132->going > 0)
+		dev_err(&u132->platform_dev->dev, "device is being removed\n");
+}
+
 
 #ifdef CONFIG_PM
 static int u132_bus_suspend(struct usb_hcd *hcd)
@@ -2985,6 +2995,7 @@ static struct hc_driver u132_hc_driver = {
 	.bus_suspend = u132_bus_suspend,
 	.bus_resume = u132_bus_resume,
 	.start_port_reset = u132_start_port_reset,
+	.hub_irq_enable = u132_hub_irq_enable,
 };
 
 /*
diff --git a/drivers/video/fsl-diu-fb.c b/drivers/video/fsl-diu-fb.c
index 712dabc6269f..09d7e22c6fef 100644
--- a/drivers/video/fsl-diu-fb.c
+++ b/drivers/video/fsl-diu-fb.c
@@ -1324,7 +1324,7 @@ static int fsl_diu_suspend(struct of_device *ofdev, pm_message_t state)
 {
 	struct fsl_diu_data *machine_data;
 
-	machine_data = dev_get_drvdata(&dev->dev);
+	machine_data = dev_get_drvdata(&ofdev->dev);
 	disable_lcdc(machine_data->fsl_diu_info[0]);
 
 	return 0;
@@ -1334,7 +1334,7 @@ static int fsl_diu_resume(struct of_device *ofdev)
 {
 	struct fsl_diu_data *machine_data;
 
-	machine_data = dev_get_drvdata(&dev->dev);
+	machine_data = dev_get_drvdata(&ofdev->dev);
 	enable_lcdc(machine_data->fsl_diu_info[0]);
 
 	return 0;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 58d43daec084..982a2064fe4c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -204,7 +204,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 	 * Note: assumes we have exclusive access to this mapping either
 	 *	 through inode->i_mutex or some other mechanism.
 	 */
-	if (page->index == 0 && invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1) < 0) {
+	if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
 		/* Should never happen */
 		nfs_zap_mapping(inode, inode->i_mapping);
 	}
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index efc015c6128a..44f87caf3683 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -606,7 +606,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
 
 	res->last_used = 0;
 
+	spin_lock(&dlm->spinlock);
 	list_add_tail(&res->tracking, &dlm->tracking_list);
+	spin_unlock(&dlm->spinlock);
 
 	memset(res->lvb, 0, DLM_LVB_LEN);
 	memset(res->refmap, 0, sizeof(res->refmap));
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ab8ccc9d14ff..c492449f3b45 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -476,10 +476,10 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 		return -ESRCH;
 	mm = get_task_mm(task);
 	if (mm) {
-		static struct mm_walk clear_refs_walk;
-		memset(&clear_refs_walk, 0, sizeof(clear_refs_walk));
-		clear_refs_walk.pmd_entry = clear_refs_pte_range;
-		clear_refs_walk.mm = mm;
+		struct mm_walk clear_refs_walk = {
+			.pmd_entry = clear_refs_pte_range,
+			.mm = mm,
+		};
 		down_read(&mm->mmap_sem);
 		for (vma = mm->mmap; vma; vma = vma->vm_next) {
 			clear_refs_walk.private = vma;
@@ -602,11 +602,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	return err;
 }
 
-static struct mm_walk pagemap_walk = {
-	.pmd_entry = pagemap_pte_range,
-	.pte_hole = pagemap_pte_hole
-};
-
 /*
  * /proc/pid/pagemap - an array mapping virtual pages to pfns
  *
@@ -641,6 +636,11 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	struct pagemapread pm;
 	int pagecount;
 	int ret = -ESRCH;
+	struct mm_walk pagemap_walk;
+	unsigned long src;
+	unsigned long svpfn;
+	unsigned long start_vaddr;
+	unsigned long end_vaddr;
 
 	if (!task)
 		goto out;
@@ -659,11 +659,15 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	if (!mm)
 		goto out_task;
 
-	ret = -ENOMEM;
+
 	uaddr = (unsigned long)buf & PAGE_MASK;
 	uend = (unsigned long)(buf + count);
 	pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
-	pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL);
+	ret = 0;
+	if (pagecount == 0)
+		goto out_mm;
+	pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+	ret = -ENOMEM;
 	if (!pages)
 		goto out_mm;
 
@@ -684,33 +688,33 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	pm.out = (u64 *)buf;
 	pm.end = (u64 *)(buf + count);
 
-	if (!ptrace_may_attach(task)) {
-		ret = -EIO;
-	} else {
-		unsigned long src = *ppos;
-		unsigned long svpfn = src / PM_ENTRY_BYTES;
-		unsigned long start_vaddr = svpfn << PAGE_SHIFT;
-		unsigned long end_vaddr = TASK_SIZE_OF(task);
-
-		/* watch out for wraparound */
-		if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
-			start_vaddr = end_vaddr;
-
-		/*
-		 * The odds are that this will stop walking way
-		 * before end_vaddr, because the length of the
-		 * user buffer is tracked in "pm", and the walk
-		 * will stop when we hit the end of the buffer.
-		 */
-		ret = walk_page_range(start_vaddr, end_vaddr,
-					&pagemap_walk);
-		if (ret == PM_END_OF_BUFFER)
-			ret = 0;
-		/* don't need mmap_sem for these, but this looks cleaner */
-		*ppos += (char *)pm.out - buf;
-		if (!ret)
-			ret = (char *)pm.out - buf;
-	}
+	pagemap_walk.pmd_entry = pagemap_pte_range;
+	pagemap_walk.pte_hole = pagemap_pte_hole;
+	pagemap_walk.mm = mm;
+	pagemap_walk.private = &pm;
+
+	src = *ppos;
+	svpfn = src / PM_ENTRY_BYTES;
+	start_vaddr = svpfn << PAGE_SHIFT;
+	end_vaddr = TASK_SIZE_OF(task);
+
+	/* watch out for wraparound */
+	if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
+		start_vaddr = end_vaddr;
+
+	/*
+	 * The odds are that this will stop walking way
+	 * before end_vaddr, because the length of the
+	 * user buffer is tracked in "pm", and the walk
+	 * will stop when we hit the end of the buffer.
+	 */
+	ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk);
+	if (ret == PM_END_OF_BUFFER)
+		ret = 0;
+	/* don't need mmap_sem for these, but this looks cleaner */
+	*ppos += (char *)pm.out - buf;
+	if (!ret)
+		ret = (char *)pm.out - buf;
 
 out_pages:
 	for (; pagecount; pagecount--) {
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 57917932212e..192269698a8a 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -45,6 +45,8 @@ void reiserfs_delete_inode(struct inode *inode)
 			goto out;
 		reiserfs_update_inode_transaction(inode);
 
+		reiserfs_discard_prealloc(&th, inode);
+
 		err = reiserfs_delete_object(&th, inode);
 
 		/* Do quota update inside a transaction for journaled quotas. We must do that
diff --git a/include/asm-powerpc/hugetlb.h b/include/asm-powerpc/hugetlb.h
index 649c6c3b87b3..be32ff02f4a0 100644
--- a/include/asm-powerpc/hugetlb.h
+++ b/include/asm-powerpc/hugetlb.h
@@ -49,12 +49,6 @@ static inline pte_t huge_pte_wrprotect(pte_t pte)
 	return pte_wrprotect(pte);
 }
 
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     unsigned long addr, pte_t *ptep,
 					     pte_t pte, int dirty)
diff --git a/include/asm-powerpc/pgtable-ppc64.h b/include/asm-powerpc/pgtable-ppc64.h
index cc6a43ba41d0..7686569a0bef 100644
--- a/include/asm-powerpc/pgtable-ppc64.h
+++ b/include/asm-powerpc/pgtable-ppc64.h
@@ -314,6 +314,16 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 	old = pte_update(mm, addr, ptep, _PAGE_RW, 0);
 }
 
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	unsigned long old;
+
+	if ((pte_val(*ptep) & _PAGE_RW) == 0)
+		return;
+	old = pte_update(mm, addr, ptep, _PAGE_RW, 1);
+}
+
 /*
  * We currently remove entries from the hashtable regardless of whether
  * the entry was young or dirty. The generic routines only flush if the
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index c7f4f8e3e297..bd0ea191dfa9 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -223,6 +223,9 @@ extern char empty_zero_page[PAGE_SIZE];
 #define _PAGE_SPECIAL	0x004		/* SW associated with special page */
 #define __HAVE_ARCH_PTE_SPECIAL
 
+/* Set of bits not changed in pte_modify */
+#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_SPECIAL)
+
 /* Six different types of pages. */
 #define _PAGE_TYPE_EMPTY	0x400
 #define _PAGE_TYPE_NONE		0x401
@@ -681,7 +684,7 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
  */
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-	pte_val(pte) &= PAGE_MASK;
+	pte_val(pte) &= _PAGE_CHG_MASK;
 	pte_val(pte) |= pgprot_val(newprot);
 	return pte;
 }
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index bfd9900742bf..76f392146daa 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -71,7 +71,8 @@ static inline long kvm_hypercall0(unsigned int nr)
 	long ret;
 	asm volatile(KVM_HYPERCALL
 		     : "=a"(ret)
-		     : "a"(nr));
+		     : "a"(nr)
+		     : "memory");
 	return ret;
 }
 
@@ -80,7 +81,8 @@ static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
 	long ret;
 	asm volatile(KVM_HYPERCALL
 		     : "=a"(ret)
-		     : "a"(nr), "b"(p1));
+		     : "a"(nr), "b"(p1)
+		     : "memory");
 	return ret;
 }
 
@@ -90,7 +92,8 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
 	long ret;
 	asm volatile(KVM_HYPERCALL
 		     : "=a"(ret)
-		     : "a"(nr), "b"(p1), "c"(p2));
+		     : "a"(nr), "b"(p1), "c"(p2)
+		     : "memory");
 	return ret;
 }
 
@@ -100,7 +103,8 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
 	long ret;
 	asm volatile(KVM_HYPERCALL
 		     : "=a"(ret)
-		     : "a"(nr), "b"(p1), "c"(p2), "d"(p3));
+		     : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
+		     : "memory");
 	return ret;
 }
 
@@ -111,7 +115,8 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
 	long ret;
 	asm volatile(KVM_HYPERCALL
 		     : "=a"(ret)
-		     : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4));
+		     : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4)
+		     : "memory");
 	return ret;
 }
 
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index f4a5871767f5..4aaa4afb1cb9 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -1,6 +1,8 @@
 #ifndef __LINUX_DEBUG_LOCKING_H
 #define __LINUX_DEBUG_LOCKING_H
 
+#include <linux/kernel.h>
+
 struct task_struct;
 
 extern int debug_locks;
@@ -11,14 +13,6 @@ extern int debug_locks_silent;
  */
 extern int debug_locks_off(void);
 
-/*
- * In the debug case we carry the caller's instruction pointer into
- * other functions, but we dont want the function argument overhead
- * in the nondebug case - hence these macros:
- */
-#define _RET_IP_		(unsigned long)__builtin_return_address(0)
-#define _THIS_IP_  ({ __label__ __here; __here: (unsigned long)&&__here; })
-
 #define DEBUG_LOCKS_WARN_ON(c)						\
 ({									\
 	int __ret = 0;							\
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9918772bf274..eddb6daadf4a 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -189,6 +189,21 @@ static inline void ide_std_init_ports(hw_regs_t *hw,
 	hw->io_ports.ctl_addr = ctl_addr;
 }
 
+/* for IDE PCI controllers in legacy mode, temporary */
+static inline int __ide_default_irq(unsigned long base)
+{
+	switch (base) {
+#ifdef CONFIG_IA64
+	case 0x1f0: return isa_irq_to_vector(14);
+	case 0x170: return isa_irq_to_vector(15);
+#else
+	case 0x1f0: return 14;
+	case 0x170: return 15;
+#endif
+	}
+	return 0;
+}
+
 #include <asm/ide.h>
 
 #if !defined(MAX_HWIFS) || defined(CONFIG_EMBEDDED)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 792bf0aa779b..2e70006c7fa8 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -46,6 +46,9 @@ extern const char linux_proc_banner[];
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
 
+#define _RET_IP_		(unsigned long)__builtin_return_address(0)
+#define _THIS_IP_  ({ __label__ __here; __here: (unsigned long)&&__here; })
+
 #ifdef CONFIG_LBD
 # include <asm/div64.h>
 # define sector_div(a, b) do_div(a, b)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d4998f81e229..1485ca8d0e00 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -79,7 +79,7 @@ static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
  *
  * For such cases, we now have a blacklist
  */
-struct kprobe_blackpoint kprobe_blacklist[] = {
+static struct kprobe_blackpoint kprobe_blacklist[] = {
 	{"preempt_schedule",},
 	{NULL}    /* Terminator */
 };
diff --git a/kernel/printk.c b/kernel/printk.c
index 8fb01c32aa3b..e2129e83fd75 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -666,7 +666,7 @@ static int acquire_console_semaphore_for_printk(unsigned int cpu)
 	return retval;
 }
 
-const char printk_recursion_bug_msg [] =
+static const char printk_recursion_bug_msg [] =
 			KERN_CRIT "BUG: recent printk recursion!\n";
 static int printk_recursion_bug;
 
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 5e02b7740702..41d275a81df5 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -925,26 +925,22 @@ void rcu_offline_cpu(int cpu)
 	spin_unlock_irqrestore(&rdp->lock, flags);
 }
 
-void __devinit rcu_online_cpu(int cpu)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
-	cpu_set(cpu, rcu_cpu_online_map);
-	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
-}
-
 #else /* #ifdef CONFIG_HOTPLUG_CPU */
 
 void rcu_offline_cpu(int cpu)
 {
 }
 
-void __devinit rcu_online_cpu(int cpu)
+#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
+
+void __cpuinit rcu_online_cpu(int cpu)
 {
-}
+	unsigned long flags;
 
-#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
+	spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
+	cpu_set(cpu, rcu_cpu_online_map);
+	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
+}
 
 static void rcu_process_callbacks(struct softirq_action *unused)
 {
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index c828c2339cc9..a272d78185eb 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -120,6 +120,7 @@ void softlockup_tick(void)
 	printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
 			this_cpu, now - touch_timestamp,
 			current->comm, task_pid_nr(current));
+	print_modules();
 	if (regs)
 		show_regs(regs);
 	else
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 6021757a4496..1dc2d1d18fa8 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -22,6 +22,8 @@
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/kernel.h>
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
 
 #include <asm/page.h>		/* for PAGE_SIZE */
 #include <asm/div64.h>
@@ -482,6 +484,89 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
 	return buf;
 }
 
+static char *string(char *buf, char *end, char *s, int field_width, int precision, int flags)
+{
+	int len, i;
+
+	if ((unsigned long)s < PAGE_SIZE)
+		s = "<NULL>";
+
+	len = strnlen(s, precision);
+
+	if (!(flags & LEFT)) {
+		while (len < field_width--) {
+			if (buf < end)
+				*buf = ' ';
+			++buf;
+		}
+	}
+	for (i = 0; i < len; ++i) {
+		if (buf < end)
+			*buf = *s;
+		++buf; ++s;
+	}
+	while (len < field_width--) {
+		if (buf < end)
+			*buf = ' ';
+		++buf;
+	}
+	return buf;
+}
+
+static inline void *dereference_function_descriptor(void *ptr)
+{
+#if defined(CONFIG_IA64) || defined(CONFIG_PPC64)
+	void *p;
+	if (!probe_kernel_address(ptr, p))
+		ptr = p;
+#endif
+	return ptr;
+}
+
+static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int precision, int flags)
+{
+	unsigned long value = (unsigned long) ptr;
+#ifdef CONFIG_KALLSYMS
+	char sym[KSYM_SYMBOL_LEN];
+	sprint_symbol(sym, value);
+	return string(buf, end, sym, field_width, precision, flags);
+#else
+	field_width = 2*sizeof(void *);
+	flags |= SPECIAL | SMALL | ZEROPAD;
+	return number(buf, end, value, 16, field_width, precision, flags);
+#endif
+}
+
+/*
+ * Show a '%p' thing.  A kernel extension is that the '%p' is followed
+ * by an extra set of alphanumeric characters that are extended format
+ * specifiers.
+ *
+ * Right now we just handle 'F' (for symbolic Function descriptor pointers)
+ * and 'S' (for Symbolic direct pointers), but this can easily be
+ * extended in the future (network address types etc).
+ *
+ * The difference between 'S' and 'F' is that on ia64 and ppc64 function
+ * pointers are really function descriptors, which contain a pointer the
+ * real address. 
+ */
+static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags)
+{
+	switch (*fmt) {
+	case 'F':
+		ptr = dereference_function_descriptor(ptr);
+		/* Fallthrough */
+	case 'S':
+		return symbol_string(buf, end, ptr, field_width, precision, flags);
+	}
+	flags |= SMALL;
+	if (field_width == -1) {
+		field_width = 2*sizeof(void *);
+		flags |= ZEROPAD;
+	}
+	return number(buf, end, (unsigned long) ptr, 16, field_width, precision, flags);
+}
+
 /**
  * vsnprintf - Format a string and place it in a buffer
  * @buf: The buffer to place the result into
@@ -502,11 +587,9 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
  */
 int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 {
-	int len;
 	unsigned long long num;
-	int i, base;
+	int base;
 	char *str, *end, c;
-	const char *s;
 
 	int flags;		/* flags to number() */
 
@@ -622,43 +705,18 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 				continue;
 
 			case 's':
-				s = va_arg(args, char *);
-				if ((unsigned long)s < PAGE_SIZE)
-					s = "<NULL>";
-
-				len = strnlen(s, precision);
-
-				if (!(flags & LEFT)) {
-					while (len < field_width--) {
-						if (str < end)
-							*str = ' ';
-						++str;
-					}
-				}
-				for (i = 0; i < len; ++i) {
-					if (str < end)
-						*str = *s;
-					++str; ++s;
-				}
-				while (len < field_width--) {
-					if (str < end)
-						*str = ' ';
-					++str;
-				}
+				str = string(str, end, va_arg(args, char *), field_width, precision, flags);
 				continue;
 
 			case 'p':
-				flags |= SMALL;
-				if (field_width == -1) {
-					field_width = 2*sizeof(void *);
-					flags |= ZEROPAD;
-				}
-				str = number(str, end,
-						(unsigned long) va_arg(args, void *),
-						16, field_width, precision, flags);
+				str = pointer(fmt+1, str, end,
+						va_arg(args, void *),
+						field_width, precision, flags);
+				/* Skip all alphanumeric pointer suffixes */
+				while (isalnum(fmt[1]))
+					fmt++;
 				continue;
 
-
 			case 'n':
 				/* FIXME:
 				* What does C99 say about the overflow case here? */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a37a5034f63d..c94e58b192c3 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -729,7 +729,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 	} else {
 		*policy = pol == &default_policy ? MPOL_DEFAULT :
 						pol->mode;
-		*policy |= pol->flags;
+		/*
+		 * Internal mempolicy flags must be masked off before exposing
+		 * the policy to userspace.
+		 */
+		*policy |= (pol->flags & MPOL_MODE_FLAGS);
 	}
 
 	if (vma) {
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index c2397f503b0f..f38cc5317b88 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -442,12 +442,16 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
 
 void __exit br_cleanup_bridges(void)
 {
-	struct net_device *dev, *nxt;
+	struct net_device *dev;
 
 	rtnl_lock();
-	for_each_netdev_safe(&init_net, dev, nxt)
-		if (dev->priv_flags & IFF_EBRIDGE)
+restart:
+	for_each_netdev(&init_net, dev) {
+		if (dev->priv_flags & IFF_EBRIDGE) {
 			del_br(dev->priv);
+			goto restart;
+		}
+	}
 	rtnl_unlock();
 
 }
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 7e8ca2836452..484bbf6dd032 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -205,12 +205,19 @@ static int can_create(struct net *net, struct socket *sock, int protocol)
  *  -ENOBUFS on full driver queue (see net_xmit_errno())
  *  -ENOMEM when local loopback failed at calling skb_clone()
  *  -EPERM when trying to send on a non-CAN interface
+ *  -EINVAL when the skb->data does not contain a valid CAN frame
  */
 int can_send(struct sk_buff *skb, int loop)
 {
 	struct sk_buff *newskb = NULL;
+	struct can_frame *cf = (struct can_frame *)skb->data;
 	int err;
 
+	if (skb->len != sizeof(struct can_frame) || cf->can_dlc > 8) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
 	if (skb->dev->type != ARPHRD_CAN) {
 		kfree_skb(skb);
 		return -EPERM;
@@ -605,6 +612,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
 		   struct packet_type *pt, struct net_device *orig_dev)
 {
 	struct dev_rcv_lists *d;
+	struct can_frame *cf = (struct can_frame *)skb->data;
 	int matches;
 
 	if (dev->type != ARPHRD_CAN || dev_net(dev) != &init_net) {
@@ -612,6 +620,8 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
 		return 0;
 	}
 
+	BUG_ON(skb->len != sizeof(struct can_frame) || cf->can_dlc > 8);
+
 	/* update statistics */
 	can_stats.rx_frames++;
 	can_stats.rx_frames_delta++;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index d9a3a9d13bed..72c2ce904f83 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -298,7 +298,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
 
 	if (head->nframes) {
 		/* can_frames starting here */
-		firstframe = (struct can_frame *) skb_tail_pointer(skb);
+		firstframe = (struct can_frame *)skb_tail_pointer(skb);
 
 		memcpy(skb_put(skb, datalen), frames, datalen);
 
@@ -826,6 +826,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		for (i = 0; i < msg_head->nframes; i++) {
 			err = memcpy_fromiovec((u8 *)&op->frames[i],
 					       msg->msg_iov, CFSIZ);
+
+			if (op->frames[i].can_dlc > 8)
+				err = -EINVAL;
+
 			if (err < 0)
 				return err;
 
@@ -858,6 +862,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		for (i = 0; i < msg_head->nframes; i++) {
 			err = memcpy_fromiovec((u8 *)&op->frames[i],
 					       msg->msg_iov, CFSIZ);
+
+			if (op->frames[i].can_dlc > 8)
+				err = -EINVAL;
+
 			if (err < 0) {
 				if (op->frames != &op->sframe)
 					kfree(op->frames);
@@ -1164,9 +1172,12 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 
 	skb->dev = dev;
 	skb->sk  = sk;
-	can_send(skb, 1); /* send with loopback */
+	err = can_send(skb, 1); /* send with loopback */
 	dev_put(dev);
 
+	if (err)
+		return err;
+
 	return CFSIZ + MHSIZ;
 }
 
@@ -1185,6 +1196,10 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (!bo->bound)
 		return -ENOTCONN;
 
+	/* check for valid message length from userspace */
+	if (size < MHSIZ || (size - MHSIZ) % CFSIZ)
+		return -EINVAL;
+
 	/* check for alternative ifindex for this bcm_op */
 
 	if (!ifindex && msg->msg_name) {
@@ -1259,8 +1274,8 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
 		break;
 
 	case TX_SEND:
-		/* we need at least one can_frame */
-		if (msg_head.nframes < 1)
+		/* we need exactly one can_frame behind the msg head */
+		if ((msg_head.nframes != 1) || (size != CFSIZ + MHSIZ))
 			ret = -EINVAL;
 		else
 			ret = bcm_tx_send(msg, ifindex, sk);
diff --git a/net/can/raw.c b/net/can/raw.c
index 69877b8e7e9c..3e46ee36a1aa 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -632,6 +632,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
 	} else
 		ifindex = ro->ifindex;
 
+	if (size != sizeof(struct can_frame))
+		return -EINVAL;
+
 	dev = dev_get_by_index(&init_net, ifindex);
 	if (!dev)
 		return -ENXIO;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 850825dc86e6..1d723de18686 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -255,6 +255,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/skbuff.h>
+#include <linux/scatterlist.h>
 #include <linux/splice.h>
 #include <linux/net.h>
 #include <linux/socket.h>
@@ -1208,7 +1209,8 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 		return -ENOTCONN;
 	while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
 		if (offset < skb->len) {
-			size_t used, len;
+			int used;
+			size_t len;
 
 			len = skb->len - offset;
 			/* Stop reading if we hit a patch of urgent data */
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 0517967a68bf..e6fb21b19b86 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -243,10 +243,10 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
 }
 EXPORT_SYMBOL_GPL(rpcb_getport_sync);
 
-static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, int version)
+static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc)
 {
 	struct rpc_message msg = {
-		.rpc_proc = rpcb_next_version[version].rpc_proc,
+		.rpc_proc = proc,
 		.rpc_argp = map,
 		.rpc_resp = &map->r_port,
 	};
@@ -271,6 +271,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
 void rpcb_getport_async(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt = task->tk_client;
+	struct rpc_procinfo *proc;
 	u32 bind_version;
 	struct rpc_xprt *xprt = task->tk_xprt;
 	struct rpc_clnt	*rpcb_clnt;
@@ -280,7 +281,6 @@ void rpcb_getport_async(struct rpc_task *task)
 	struct sockaddr *sap = (struct sockaddr *)&addr;
 	size_t salen;
 	int status;
-	struct rpcb_info *info;
 
 	dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
 		task->tk_pid, __func__,
@@ -313,10 +313,12 @@ void rpcb_getport_async(struct rpc_task *task)
 	/* Don't ever use rpcbind v2 for AF_INET6 requests */
 	switch (sap->sa_family) {
 	case AF_INET:
-		info = rpcb_next_version;
+		proc = rpcb_next_version[xprt->bind_index].rpc_proc;
+		bind_version = rpcb_next_version[xprt->bind_index].rpc_vers;
 		break;
 	case AF_INET6:
-		info = rpcb_next_version6;
+		proc = rpcb_next_version6[xprt->bind_index].rpc_proc;
+		bind_version = rpcb_next_version6[xprt->bind_index].rpc_vers;
 		break;
 	default:
 		status = -EAFNOSUPPORT;
@@ -324,14 +326,13 @@ void rpcb_getport_async(struct rpc_task *task)
 				task->tk_pid, __func__);
 		goto bailout_nofree;
 	}
-	if (info[xprt->bind_index].rpc_proc == NULL) {
+	if (proc == NULL) {
 		xprt->bind_index = 0;
 		status = -EPFNOSUPPORT;
 		dprintk("RPC: %5u %s: no more getport versions available\n",
 			task->tk_pid, __func__);
 		goto bailout_nofree;
 	}
-	bind_version = info[xprt->bind_index].rpc_vers;
 
 	dprintk("RPC: %5u %s: trying rpcbind version %u\n",
 		task->tk_pid, __func__, bind_version);
@@ -361,22 +362,20 @@ void rpcb_getport_async(struct rpc_task *task)
 	map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
 	map->r_owner = RPCB_OWNER_STRING;	/* ignored for GETADDR */
 
-	child = rpcb_call_async(rpcb_clnt, map, xprt->bind_index);
+	child = rpcb_call_async(rpcb_clnt, map, proc);
 	rpc_release_client(rpcb_clnt);
 	if (IS_ERR(child)) {
 		status = -EIO;
+		/* rpcb_map_release() has freed the arguments */
 		dprintk("RPC: %5u %s: rpc_run_task failed\n",
 			task->tk_pid, __func__);
-		goto bailout;
+		goto bailout_nofree;
 	}
 	rpc_put_task(child);
 
 	task->tk_xprt->stat.bind_count++;
 	return;
 
-bailout:
-	kfree(map);
-	xprt_put(xprt);
 bailout_nofree:
 	rpcb_wake_rpcbind_waiters(xprt, status);
 bailout_nowake:
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 1dcf9f3d1107..44589088941f 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -278,7 +278,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi)
 
 	ent->fields.remote_irr = 0;
 	if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
-		ioapic_deliver(ioapic, gsi);
+		ioapic_service(ioapic, gsi);
 }
 
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)