140 files changed, 8688 insertions, 843 deletions
diff --git a/Documentation/ABI/testing/debugfs-driver-genwqe b/Documentation/ABI/testing/debugfs-driver-genwqe
new file mode 100644
index 000000000000..1c2f25674e8c
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-driver-genwqe
@@ -0,0 +1,91 @@
+What:           /sys/kernel/debug/genwqe/genwqe<n>_card/ddcb_info
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    DDCB queue dump used for debugging queueing problems.
+
+What:           /sys/kernel/debug/genwqe/genwqe<n>_card/curr_regs
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Dump of the current error registers.
+                Only available for PF.
+
+What:           /sys/kernel/debug/genwqe/genwqe<n>_card/curr_dbg_uid0
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Internal chip state of UID0 (unit id 0).
+                Only available for PF.
+
+What:           /sys/kernel/debug/genwqe/genwqe<n>_card/curr_dbg_uid1
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Internal chip state of UID1.
+                Only available for PF.
+
+What:           /sys/kernel/debug/genwqe/genwqe<n>_card/curr_dbg_uid2
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Internal chip state of UID2.
+                Only available for PF.
+
+What:           /sys/kernel/debug/genwqe/genwqe<n>_card/prev_regs
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Dump of the error registers before the last reset of
+                the card occured.
+                Only available for PF.
+
+What:           /sys/kernel/debug/genwqe/genwqe<n>_card/prev_dbg_uid0
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Internal chip state of UID0 before card was reset.
+                Only available for PF.
+
+What:           /sys/kernel/debug/genwqe/genwqe<n>_card/prev_dbg_uid1
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Internal chip state of UID1 before card was reset.
+                Only available for PF.
+
+What:           /sys/kernel/debug/genwqe/genwqe<n>_card/prev_dbg_uid2
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Internal chip state of UID2 before card was reset.
+                Only available for PF.
+
+What:           /sys/kernel/debug/genwqe/genwqe<n>_card/info
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Comprehensive summary of bitstream version and software
+                version. Used bitstream and bitstream clocking information.
+
+What:           /sys/kernel/debug/genwqe/genwqe<n>_card/err_inject
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Possibility to inject error cases to ensure that the drivers
+                error handling code works well.
+
+What:           /sys/kernel/debug/genwqe/genwqe<n>_card/vf<0..14>_jobtimeout_msec
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Default VF timeout 250ms. Testing might require 1000ms.
+                Using 0 will use the cards default value (whatever that is).
+
+                The timeout depends on the max number of available cards
+                in the system and the maximum allowed queue size.
+
+                The driver ensures that the settings are done just before
+                the VFs get enabled. Changing the timeouts in flight is not
+                possible.
+                Only available for PF.
+
+What:           /sys/kernel/debug/genwqe/genwqe<n>_card/jobtimer
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Dump job timeout register values for PF and VFs.
+                Only available for PF.
+
+What:           /sys/kernel/debug/genwqe/genwqe<n>_card/queue_working_time
+Date:           Dec 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Dump queue working time register values for PF and VFs.
+                Only available for PF.
diff --git a/Documentation/ABI/testing/sysfs-driver-genwqe b/Documentation/ABI/testing/sysfs-driver-genwqe
new file mode 100644
index 000000000000..1870737a1f5e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-genwqe
@@ -0,0 +1,62 @@
+What:           /sys/class/genwqe/genwqe<n>_card/version
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Unique bitstream identification e.g.
+                '0000000330336283.00000000475a4950'.
+
+What:           /sys/class/genwqe/genwqe<n>_card/appid
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Identifies the currently active card application e.g. 'GZIP'
+                for compression/decompression.
+
+What:           /sys/class/genwqe/genwqe<n>_card/type
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Type of the card e.g. 'GenWQE5-A7'.
+
+What:           /sys/class/genwqe/genwqe<n>_card/curr_bitstream
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Currently active bitstream. 1 is default, 0 is backup.
+
+What:           /sys/class/genwqe/genwqe<n>_card/next_bitstream
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Interface to set the next bitstream to be used.
+
+What:           /sys/class/genwqe/genwqe<n>_card/tempsens
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Interface to read the cards temperature sense register.
+
+What:           /sys/class/genwqe/genwqe<n>_card/freerunning_timer
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Interface to read the cards free running timer.
+                Used for performance and utilization measurements.
+
+What:           /sys/class/genwqe/genwqe<n>_card/queue_working_time
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Interface to read queue working time.
+                Used for performance and utilization measurements.
+
+What:           /sys/class/genwqe/genwqe<n>_card/state
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    State of the card: "unused", "used", "error".
+
+What:           /sys/class/genwqe/genwqe<n>_card/base_clock
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Base clock frequency of the card.
+
+What:           /sys/class/genwqe/genwqe<n>_card/device/sriov_numvfs
+Date:           Oct 2013
+Contact:        haver@linux.vnet.ibm.com
+Description:    Enable VFs (1..15):
+                  sudo sh -c 'echo 15 > \
+                    /sys/bus/pci/devices/0000\:1b\:00.0/sriov_numvfs'
+                Disable VFs:
+                  Write a 0 into the same sysfs entry.
diff --git a/Documentation/HOWTO b/Documentation/HOWTO
index 27faae3e3846..57cf5efb044d 100644
--- a/Documentation/HOWTO
+++ b/Documentation/HOWTO
@@ -112,7 +112,7 @@ required reading:
 
     Other excellent descriptions of how to create patches properly are:
 	"The Perfect Patch"
-		http://kerneltrap.org/node/3737
+		http://www.ozlabs.org/~akpm/stuff/tpp.txt
 	"Linux kernel patch submission format"
 		http://linux.yyz.us/patch-format.html
 
@@ -579,7 +579,7 @@ all time.  It should describe the patch completely, containing:
 For more details on what this should all look like, please see the
 ChangeLog section of the document:
   "The Perfect Patch"
-      http://userweb.kernel.org/~akpm/stuff/tpp.txt
+      http://www.ozlabs.org/~akpm/stuff/tpp.txt
 
 
 
diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt
new file mode 100644
index 000000000000..b2830b435895
--- /dev/null
+++ b/Documentation/block/null_blk.txt
@@ -0,0 +1,72 @@
+Null block device driver
+================================================================================
+
+I. Overview
+
+The null block device (/dev/nullb*) is used for benchmarking the various
+block-layer implementations. It emulates a block device of X gigabytes in size.
+The following instances are possible:
+
+  Single-queue block-layer
+    - Request-based.
+    - Single submission queue per device.
+    - Implements IO scheduling algorithms (CFQ, Deadline, noop).
+  Multi-queue block-layer
+    - Request-based.
+    - Configurable submission queues per device.
+  No block-layer (Known as bio-based)
+    - Bio-based. IO requests are submitted directly to the device driver.
+    - Directly accepts bio data structure and returns them.
+
+All of them have a completion queue for each core in the system.
+
+II. Module parameters applicable for all instances:
+
+queue_mode=[0-2]: Default: 2-Multi-queue
+  Selects which block-layer the module should instantiate with.
+
+  0: Bio-based.
+  1: Single-queue.
+  2: Multi-queue.
+
+home_node=[0--nr_nodes]: Default: NUMA_NO_NODE
+  Selects what CPU node the data structures are allocated from.
+
+gb=[Size in GB]: Default: 250GB
+  The size of the device reported to the system.
+
+bs=[Block size (in bytes)]: Default: 512 bytes
+  The block size reported to the system.
+
+nr_devices=[Number of devices]: Default: 2
+  Number of block devices instantiated. They are instantiated as /dev/nullb0,
+  etc.
+
+irq_mode=[0-2]: Default: 1-Soft-irq
+  The completion mode used for completing IOs to the block-layer.
+
+  0: None.
+  1: Soft-irq. Uses IPI to complete IOs across CPU nodes. Simulates the overhead
+     when IOs are issued from another CPU node than the home the device is
+     connected to.
+  2: Timer: Waits a specific period (completion_nsec) for each IO before
+     completion.
+
+completion_nsec=[ns]: Default: 10.000ns
+  Combined with irq_mode=2 (timer). The time each completion event must wait.
+
+submit_queues=[0..nr_cpus]:
+  The number of submission queues attached to the device driver. If unset, it
+  defaults to 1 on single-queue and bio-based instances. For multi-queue,
+  it is ignored when use_per_node_hctx module parameter is 1.
+
+hw_queue_depth=[0..qdepth]: Default: 64
+  The hardware queue depth of the device.
+
+III: Multi-queue specific parameters
+
+use_per_node_hctx=[0/1]: Default: 0
+  0: The number of submit queues are set to the value of the submit_queues
+     parameter.
+  1: The multi-queue block layer is instantiated with a hardware dispatch
+     queue for each CPU node in the system.
diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.txt b/Documentation/devicetree/bindings/arm/atmel-at91.txt
index 1196290082d1..78530e621a1e 100644
--- a/Documentation/devicetree/bindings/arm/atmel-at91.txt
+++ b/Documentation/devicetree/bindings/arm/atmel-at91.txt
@@ -20,6 +20,10 @@ TC/TCLIB Timer required properties:
 - interrupts: Should contain all interrupts for the TC block
   Note that you can specify several interrupt cells if the TC
   block has one interrupt per channel.
+- clock-names: tuple listing input clock names.
+	Required elements: "t0_clk"
+	Optional elements: "t1_clk", "t2_clk"
+- clocks: phandles to input clocks.
 
 Examples:
 
@@ -28,6 +32,8 @@ One interrupt per TC block:
 		compatible = "atmel,at91rm9200-tcb";
 		reg = <0xfff7c000 0x100>;
 		interrupts = <18 4>;
+		clocks = <&tcb0_clk>;
+		clock-names = "t0_clk";
 	};
 
 One interrupt per TC channel in a TC block:
@@ -35,6 +41,8 @@ One interrupt per TC channel in a TC block:
 		compatible = "atmel,at91rm9200-tcb";
 		reg = <0xfffdc000 0x100>;
 		interrupts = <26 4 27 4 28 4>;
+		clocks = <&tcb1_clk>;
+		clock-names = "t0_clk";
 	};
 
 RSTC Reset Controller required properties:
diff --git a/Documentation/devicetree/bindings/misc/atmel-ssc.txt b/Documentation/devicetree/bindings/misc/atmel-ssc.txt
index a45ae08c8ed1..60960b2755f4 100644
--- a/Documentation/devicetree/bindings/misc/atmel-ssc.txt
+++ b/Documentation/devicetree/bindings/misc/atmel-ssc.txt
@@ -6,6 +6,9 @@ Required properties:
 	- atmel,at91sam9g45-ssc: support dma transfer
 - reg: Should contain SSC registers location and length
 - interrupts: Should contain SSC interrupt
+- clock-names: tuple listing input clock names.
+	Required elements: "pclk"
+- clocks: phandles to input clocks.
 
 
 Required properties for devices compatible with "atmel,at91sam9g45-ssc":
@@ -20,6 +23,8 @@ ssc0: ssc@fffbc000 {
 	compatible = "atmel,at91rm9200-ssc";
 	reg = <0xfffbc000 0x4000>;
 	interrupts = <14 4 5>;
+	clocks = <&ssc0_clk>;
+	clock-names = "pclk";
 };
 
 - DMA transfer:
diff --git a/Documentation/devicetree/bindings/misc/bmp085.txt b/Documentation/devicetree/bindings/misc/bmp085.txt
index 91dfda2e4e11..d7a6deb6b21e 100644
--- a/Documentation/devicetree/bindings/misc/bmp085.txt
+++ b/Documentation/devicetree/bindings/misc/bmp085.txt
@@ -8,6 +8,8 @@ Optional properties:
 - temp-measurement-period: temperature measurement period (milliseconds)
 - default-oversampling: default oversampling value to be used at startup,
   value range is 0-3 with rising sensitivity.
+- interrupt-parent: should be the phandle for the interrupt controller
+- interrupts: interrupt mapping for IRQ
 
 Example:
 
@@ -17,4 +19,6 @@ pressure@77 {
 	chip-id = <10>;
 	temp-measurement-period = <100>;
 	default-oversampling = <2>;
+	interrupt-parent = <&gpio0>;
+	interrupts = <25 IRQ_TYPE_EDGE_RISING>;
 };
diff --git a/Documentation/ja_JP/HOWTO b/Documentation/ja_JP/HOWTO
index 8148a47fc70e..0091a8215ac1 100644
--- a/Documentation/ja_JP/HOWTO
+++ b/Documentation/ja_JP/HOWTO
@@ -149,7 +149,7 @@ linux-api@ver.kernel.org に送ることを勧めます。
      この他にパッチを作る方法についてのよくできた記述は-
 
 	"The Perfect Patch"
-		http://userweb.kernel.org/~akpm/stuff/tpp.txt
+		http://www.ozlabs.org/~akpm/stuff/tpp.txt
 	"Linux kernel patch submission format"
 		http://linux.yyz.us/patch-format.html
 
@@ -622,7 +622,7 @@ Linux カーネルコミュニティは、一度に大量のコードの塊を�
 これについて全てがどのようにあるべきかについての詳細は、以下のドキュメ
 ントの ChangeLog セクションを見てください-
   "The Perfect Patch"
-      http://userweb.kernel.org/~akpm/stuff/tpp.txt
+      http://www.ozlabs.org/~akpm/stuff/tpp.txt
 
 これらのどれもが、時にはとても困難です。これらの慣例を完璧に実施するに
 は数年かかるかもしれません。これは継続的な改善のプロセスであり、そのた
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 50680a59a2ff..b9e9bd854298 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1529,6 +1529,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 			* atapi_dmadir: Enable ATAPI DMADIR bridge support
 
+			* disable: Disable this device.
+
 			If there are multiple matching configurations changing
 			the same attribute, the last one is used.
 
diff --git a/Documentation/ko_KR/HOWTO b/Documentation/ko_KR/HOWTO
index 680e64635958..dc2ff8f611e0 100644
--- a/Documentation/ko_KR/HOWTO
+++ b/Documentation/ko_KR/HOWTO
@@ -122,7 +122,7 @@ mtk.manpages@gmail.com의 메인테이너에게 보낼 것을 권장한다.
 
     올바른 패치들을 만드는 법에 관한 훌륭한 다른 문서들이 있다.
     "The Perfect Patch"
-        http://userweb.kernel.org/~akpm/stuff/tpp.txt
+        http://www.ozlabs.org/~akpm/stuff/tpp.txt
     "Linux kernel patch submission format"
         http://linux.yyz.us/patch-format.html
 
@@ -213,7 +213,7 @@ Documentation/DocBook/ 디렉토리 내에서 만들어지며 PDF, Postscript, H
 것은 Linux Cross-Reference project이며 그것은 자기 참조 방식이며
 소스코드를 인덱스된 웹 페이지들의 형태로 보여준다. 최신의 멋진 커널
 코드 저장소는 다음을 통하여 참조할 수 있다.
-      http://users.sosdg.org/~qiyong/lxr/
+      http://lxr.linux.no/+trees
 
 
 개발 프로세스
@@ -222,20 +222,20 @@ Documentation/DocBook/ 디렉토리 내에서 만들어지며 PDF, Postscript, H
 리눅스 커널 개발 프로세스는 현재 몇몇 다른 메인 커널 "브랜치들"과
 서브시스템에 특화된 커널 브랜치들로 구성된다. 몇몇 다른 메인
 브랜치들은 다음과 같다.
-  - main 2.6.x 커널 트리
-  - 2.6.x.y - 안정된 커널 트리
-  - 2.6.x -git 커널 패치들
-  - 2.6.x -mm 커널 패치들
+  - main 3.x 커널 트리
+  - 3.x.y - 안정된 커널 트리
+  - 3.x -git 커널 패치들
   - 서브시스템을 위한 커널 트리들과 패치들
+  - 3.x - 통합 테스트를 위한 next 커널 트리
 
-2.6.x 커널 트리
+3.x 커널 트리
 ---------------
 
-2.6.x 커널들은 Linux Torvalds가 관리하며 kernel.org의 pub/linux/kernel/v2.6/
+3.x 커널들은 Linux Torvalds가 관리하며 kernel.org의 pub/linux/kernel/v3.x/
 디렉토리에서 참조될 수 있다.개발 프로세스는 다음과 같다.
   - 새로운 커널이 배포되자마자 2주의 시간이 주어진다. 이 기간동은
     메인테이너들은 큰 diff들을 Linus에게 제출할 수 있다. 대개 이 패치들은
-    몇 주 동안 -mm 커널내에 이미 있었던 것들이다. 큰 변경들을 제출하는 데
+    몇 주 동안 -next 커널내에 이미 있었던 것들이다. 큰 변경들을 제출하는 데
     선호되는 방법은  git(커널의 소스 관리 툴, 더 많은 정보들은 http://git.or.cz/
     에서 참조할 수 있다)를 사용하는 것이지만 순수한 패치파일의 형식으로 보내는
     것도 무관하다.
@@ -262,20 +262,20 @@ Andrew Morton의 글이 있다.
          버그의 상황에 따라 배포되는 것이지 미리정해 놓은 시간에 따라
          배포되는 것은 아니기 때문이다."
 
-2.6.x.y - 안정 커널 트리
+3.x.y - 안정 커널 트리
 ------------------------
 
-4 자리 숫자로 이루어진 버젼의 커널들은 -stable 커널들이다. 그것들은 2.6.x
+3 자리 숫자로 이루어진 버젼의 커널들은 -stable 커널들이다. 그것들은 3.x
 커널에서 발견된 큰 회귀들이나 보안 문제들 중 비교적 작고 중요한 수정들을
 포함한다.
 
 이것은 가장 최근의 안정적인 커널을 원하는 사용자에게 추천되는 브랜치이며,
 개발/실험적 버젼을 테스트하는 것을 돕고자 하는 사용자들과는 별로 관련이 없다.
 
-어떤 2.6.x.y 커널도 사용할 수 없다면 그때는 가장 높은 숫자의 2.6.x
+어떤 3.x.y 커널도 사용할 수 없다면 그때는 가장 높은 숫자의 3.x
 커널이 현재의 안정 커널이다.
 
-2.6.x.y는 "stable" 팀<stable@kernel.org>에 의해 관리되며 거의 매번 격주로
+3.x.y는 "stable" 팀<stable@vger.kernel.org>에 의해 관리되며 거의 매번 격주로
 배포된다.
 
 커널 트리 문서들 내에 Documentation/stable_kernel_rules.txt 파일은 어떤
@@ -283,84 +283,46 @@ Andrew Morton의 글이 있다.
 진행되는지를 설명한다.
 
 
-2.6.x -git 패치들
+3.x -git 패치들
 ------------------
 git 저장소(그러므로 -git이라는 이름이 붙음)에는 날마다 관리되는 Linus의
 커널 트리의 snapshot 들이 있다. 이 패치들은 일반적으로 날마다 배포되며
 Linus의 트리의 현재 상태를 나타낸다. 이 패치들은 정상적인지 조금도
 살펴보지 않고 자동적으로 생성된 것이므로 -rc 커널들 보다도 더 실험적이다.
 
-2.6.x -mm 커널 패치들
----------------------
-Andrew Morton에 의해 배포된 실험적인 커널 패치들이다. Andrew는 모든 다른
-서브시스템 커널 트리와 패치들을 가져와서 리눅스 커널 메일링 리스트로
-온 많은 패치들과 한데 묶는다. 이 트리는 새로운 기능들과 패치들을 위한
-장소를 제공하는 역할을 한다. 하나의 패치가 -mm에 한동안 있으면서 그 가치가
-증명되게 되면 Andrew나 서브시스템 메인테이너는 그것을 메인라인에 포함시키기
-위하여 Linus에게 보낸다.
-
-커널 트리에 포함하고 싶은 모든 새로운 패치들은 Linus에게 보내지기 전에
--mm 트리에서 테스트를 하는 것을 적극 추천한다.
-
-이 커널들은 안정되게 사용할 시스템에서에 실행하는 것은 적합하지 않으며
-다른 브랜치들의 어떤 것들보다 위험하다.
-
-여러분이 커널 개발 프로세스를 돕길 원한다면 이 커널 배포들을 사용하고
-테스트한 후 어떤 문제를 발견하거나 또는 모든 것이 잘 동작한다면 리눅스
-커널 메일링 리스트로 피드백을 해달라.
-
-이 커널들은 일반적으로 모든 다른 실험적인 패치들과 배포될 당시의
-사용가능한 메인라인 -git 커널들의 몇몇 변경을 포함한다.
-
--mm 커널들은 정해진 일정대로 배포되지 않는다. 하지만 대개 몇몇 -mm 커널들은
-각 -rc 커널(1부터 3이 흔함) 사이에서 배포된다.
-
 서브시스템 커널 트리들과 패치들
 -------------------------------
-많은 다른 커널 서브시스템 개발자들은 커널의 다른 부분들에서 무슨 일이
-일어나고 있는지를 볼수 있도록 그들의 개발 트리를 공개한다. 이 트리들은
-위에서 설명하였던 것 처럼 -mm 커널 배포들로 합쳐진다.
-
-다음은  활용가능한 커널 트리들을 나열한다.
-  git trees:
-    - Kbuild development tree, Sam Ravnborg < sam@ravnborg.org>
-    git.kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git
-
-    - ACPI development tree, Len Brown <len.brown@intel.com >
-    git.kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git
-
-    - Block development tree, Jens Axboe <jens.axboe@oracle.com>
-    git.kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
-
-    - DRM development tree, Dave Airlie <airlied@linux.ie>
-    git.kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git
-
-    - ia64 development tree, Tony Luck < tony.luck@intel.com>
-    git.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git
-
-    - infiniband, Roland Dreier <rolandd@cisco.com >
-    git.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
-
-    - libata, Jeff Garzik <jgarzik@pobox.com>
-    git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git
-
-    - network drivers, Jeff Garzik <jgarzik@pobox.com>
-    git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git
-
-    - pcmcia, Dominik Brodowski < linux@dominikbrodowski.net>
-    git.kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git
-
-    - SCSI, James Bottomley < James.Bottomley@SteelEye.com>
-    git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
-
-  quilt trees:
-    - USB, PCI, Driver Core, and I2C, Greg Kroah-Hartman < gregkh@linuxfoundation.org>
-    kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
-    - x86-64, partly i386, Andi Kleen < ak@suse.de>
-        ftp.firstfloor.org:/pub/ak/x86_64/quilt/
-
-  다른 커널 트리들은 http://kernel.org/git와 MAINTAINERS 파일에서 참조할 수
-  있다.
+다양한 커널 서브시스템의 메인테이너들 --- 그리고 많은 커널 서브시스템 개발자들
+--- 은 그들의 현재 개발 상태를 소스 저장소로 노출한다. 이를 통해 다른 사람들도
+커널의 다른 영역에 어떤 변화가 이루어지고 있는지 알 수 있다. 급속히 개발이
+진행되는 영역이 있고 그렇지 않은 영역이 있으므로, 개발자는 다른 개발자가 제출한
+수정 사항과 자신의 수정사항의 충돌이나 동일한 일을 동시에 두사람이 따로
+진행하는 사태를 방지하기 위해 급속히 개발이 진행되고 있는 영역에 작업의
+베이스를 맞춰줄 것이 요구된다.
+
+대부분의 이러한 저장소는 git 트리지만, git이 아닌 SCM으로 관리되거나, quilt
+시리즈로 제공되는 패치들도 존재한다. 이러한 서브시스템 저장소들은 MAINTAINERS
+파일에 나열되어 있다. 대부분은 http://git.kernel.org 에서 볼 수 있다.
+
+제안된 패치는 서브시스템 트리에 커밋되기 전에 메일링 리스트를 통해
+리뷰된다(아래의 관련 섹션을 참고하기 바란다). 일부 커널 서브시스템의 경우, 이
+리뷰 프로세스는 patchwork라는 도구를 통해 추적된다. patchwork은 등록된 패치와
+패치에 대한 코멘트, 패치의 버전을 볼 수 있는 웹 인터페이스를 제공하고,
+메인테이너는 패치를 리뷰 중, 리뷰 통과, 또는 반려됨으로 표시할 수 있다.
+대부분의 이러한 patchwork 사이트는 http://patchwork.kernel.org/ 또는
+http://patchwork.ozlabs.org/ 에 나열되어 있다.
+
+3.x - 통합 테스트를 위한 next 커널 트리
+-----------------------------------------
+서브시스템 트리들의 변경사항들은 mainline 3.x 트리로 들어오기 전에 통합
+테스트를 거쳐야 한다. 이런 목적으로, 모든 서브시스템 트리의 변경사항을 거의
+매일 받아가는 특수한 테스트 저장소가 존재한다:
+       http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git
+       http://linux.f-seidel.de/linux-next/pmwiki/
+
+이런 식으로, -next 커널을 통해 다음 머지 기간에 메인라인 커널에 어떤 변경이
+가해질 것인지 간략히 알 수 있다. 모험심 강한 테스터라면 -next 커널에서 테스트를
+수행하는 것도 좋을 것이다.
 
 버그 보고
 ---------
@@ -597,7 +559,7 @@ Pat이라는 이름을 가진 여자가 있을 수도 있는 것이다. 리눅�
 
 이것이 무엇인지 더 자세한 것을 알고 싶다면 다음 문서의 ChageLog 항을 봐라.
    "The Perfect Patch"
-    http://userweb.kernel.org/~akpm/stuff/tpp.txt
+    http://www.ozlabs.org/~akpm/stuff/tpp.txt
 
 
 
diff --git a/Documentation/misc-devices/mei/mei-amt-version.c b/Documentation/misc-devices/mei/mei-amt-version.c
index 49e4f770864a..57d0d871dcf7 100644
--- a/Documentation/misc-devices/mei/mei-amt-version.c
+++ b/Documentation/misc-devices/mei/mei-amt-version.c
@@ -115,8 +115,6 @@ static bool mei_init(struct mei *me, const uuid_le *guid,
 	struct mei_client *cl;
 	struct mei_connect_client_data data;
 
-	mei_deinit(me);
-
 	me->verbose = verbose;
 
 	me->fd = open("/dev/mei", O_RDWR);
diff --git a/Documentation/zh_CN/HOWTO b/Documentation/zh_CN/HOWTO
index 7fba5aab9ef9..6c914aa87e71 100644
--- a/Documentation/zh_CN/HOWTO
+++ b/Documentation/zh_CN/HOWTO
@@ -112,7 +112,7 @@ Linux内核代码中包含有大量的文档。这些文档对于学习如何与
 
     其他关于如何正确地生成补丁的优秀文档包括：
     "The Perfect Patch"
-        http://userweb.kernel.org/~akpm/stuff/tpp.txt
+        http://www.ozlabs.org/~akpm/stuff/tpp.txt
     "Linux kernel patch submission format"
         http://linux.yyz.us/patch-format.html
 
@@ -515,7 +515,7 @@ Linux内核社区并不喜欢一下接收大段的代码。修改需要被恰当
 
 想了解它具体应该看起来像什么，请查阅以下文档中的“ChangeLog”章节：
   “The Perfect Patch”
-  	 http://userweb.kernel.org/~akpm/stuff/tpp.txt
+  	 http://www.ozlabs.org/~akpm/stuff/tpp.txt
 
 
 这些事情有时候做起来很难。要在任何方面都做到完美可能需要好几年时间。这是
diff --git a/MAINTAINERS b/MAINTAINERS
index d5e4ff328cc7..0a1475b6b358 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2616,7 +2616,7 @@ S:	Maintained
 F:	drivers/platform/x86/dell-laptop.c
 
 DELL LAPTOP SMM DRIVER
-S:	Orphan
+M:	Guenter Roeck <linux@roeck-us.net>
 F:	drivers/char/i8k.c
 F:	include/uapi/linux/i8k.h
 
diff --git a/Makefile b/Makefile
index 14d592cbbc5f..ab80be7a38bc 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 13
 SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
 NAME = One Giant Leap for Frogkind
 
 # *DOCUMENTATION*
diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi
index 46e1d7ef163f..9987dd0e9c59 100644
--- a/arch/arm/boot/dts/r8a7790.dtsi
+++ b/arch/arm/boot/dts/r8a7790.dtsi
@@ -241,7 +241,7 @@
 
 	sdhi0: sdhi@ee100000 {
 		compatible = "renesas,sdhi-r8a7790";
-		reg = <0 0xee100000 0 0x100>;
+		reg = <0 0xee100000 0 0x200>;
 		interrupt-parent = <&gic>;
 		interrupts = <0 165 4>;
 		cap-sd-highspeed;
@@ -250,7 +250,7 @@
 
 	sdhi1: sdhi@ee120000 {
 		compatible = "renesas,sdhi-r8a7790";
-		reg = <0 0xee120000 0 0x100>;
+		reg = <0 0xee120000 0 0x200>;
 		interrupt-parent = <&gic>;
 		interrupts = <0 166 4>;
 		cap-sd-highspeed;
diff --git a/arch/arm/mach-omap2/board-ldp.c b/arch/arm/mach-omap2/board-ldp.c
index 4ec8d82b0492..44a59c3abfb0 100644
--- a/arch/arm/mach-omap2/board-ldp.c
+++ b/arch/arm/mach-omap2/board-ldp.c
@@ -242,12 +242,18 @@ static void __init ldp_display_init(void)
 
 static int ldp_twl_gpio_setup(struct device *dev, unsigned gpio, unsigned ngpio)
 {
+	int res;
+
 	/* LCD enable GPIO */
 	ldp_lcd_pdata.enable_gpio = gpio + 7;
 
 	/* Backlight enable GPIO */
 	ldp_lcd_pdata.backlight_gpio = gpio + 15;
 
+	res = platform_device_register(&ldp_lcd_device);
+	if (res)
+		pr_err("Unable to register LCD: %d\n", res);
+
 	return 0;
 }
 
@@ -346,7 +352,6 @@ static struct omap2_hsmmc_info mmc[] __initdata = {
 
 static struct platform_device *ldp_devices[] __initdata = {
 	&ldp_gpio_keys_device,
-	&ldp_lcd_device,
 };
 
 #ifdef CONFIG_OMAP_MUX
diff --git a/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c b/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c
index 56cebb05509e..d23c77fadb31 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c
@@ -796,7 +796,7 @@ struct omap_hwmod omap2xxx_counter_32k_hwmod = {
 
 /* gpmc */
 static struct omap_hwmod_irq_info omap2xxx_gpmc_irqs[] = {
-	{ .irq = 20 },
+	{ .irq = 20 + OMAP_INTC_START, },
 	{ .irq = -1 }
 };
 
@@ -841,7 +841,7 @@ static struct omap_hwmod_class omap2_rng_hwmod_class = {
 };
 
 static struct omap_hwmod_irq_info omap2_rng_mpu_irqs[] = {
-	{ .irq = 52 },
+	{ .irq = 52 + OMAP_INTC_START, },
 	{ .irq = -1 }
 };
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index d33742908f97..4c3b1e6df508 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -2165,7 +2165,7 @@ static struct omap_hwmod_class omap3xxx_gpmc_hwmod_class = {
 };
 
 static struct omap_hwmod_irq_info omap3xxx_gpmc_irqs[] = {
-	{ .irq = 20 },
+	{ .irq = 20 + OMAP_INTC_START, },
 	{ .irq = -1 }
 };
 
@@ -2999,7 +2999,7 @@ static struct omap_mmu_dev_attr mmu_isp_dev_attr = {
 
 static struct omap_hwmod omap3xxx_mmu_isp_hwmod;
 static struct omap_hwmod_irq_info omap3xxx_mmu_isp_irqs[] = {
-	{ .irq = 24 },
+	{ .irq = 24 + OMAP_INTC_START, },
 	{ .irq = -1 }
 };
 
@@ -3041,7 +3041,7 @@ static struct omap_mmu_dev_attr mmu_iva_dev_attr = {
 
 static struct omap_hwmod omap3xxx_mmu_iva_hwmod;
 static struct omap_hwmod_irq_info omap3xxx_mmu_iva_irqs[] = {
-	{ .irq = 28 },
+	{ .irq = 28 + OMAP_INTC_START, },
 	{ .irq = -1 }
 };
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
index db32d5380b11..18f333c440db 100644
--- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
@@ -1637,7 +1637,7 @@ static struct omap_hwmod dra7xx_uart1_hwmod = {
 	.class		= &dra7xx_uart_hwmod_class,
 	.clkdm_name	= "l4per_clkdm",
 	.main_clk	= "uart1_gfclk_mux",
-	.flags		= HWMOD_SWSUP_SIDLE_ACT,
+	.flags		= HWMOD_SWSUP_SIDLE_ACT | DEBUG_OMAP2UART1_FLAGS,
 	.prcm = {
 		.omap4 = {
 			.clkctrl_offs = DRA7XX_CM_L4PER_UART1_CLKCTRL_OFFSET,
diff --git a/arch/arm/mach-pxa/include/mach/lubbock.h b/arch/arm/mach-pxa/include/mach/lubbock.h
index 2a086e8373eb..958cd6af9384 100644
--- a/arch/arm/mach-pxa/include/mach/lubbock.h
+++ b/arch/arm/mach-pxa/include/mach/lubbock.h
@@ -10,6 +10,8 @@
  * published by the Free Software Foundation.
  */
 
+#include <mach/irqs.h>
+
 #define LUBBOCK_ETH_PHYS	PXA_CS3_PHYS
 
 #define LUBBOCK_FPGA_PHYS	PXA_CS2_PHYS
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c
index 958e3cbf0ac2..c18689123023 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva.c
@@ -614,6 +614,11 @@ static struct regulator_consumer_supply fixed3v3_power_consumers[] = {
 	REGULATOR_SUPPLY("vqmmc", "sh_mmcif"),
 };
 
+/* Fixed 3.3V regulator used by LCD backlight */
+static struct regulator_consumer_supply fixed5v0_power_consumers[] = {
+	REGULATOR_SUPPLY("power", "pwm-backlight.0"),
+};
+
 /* Fixed 3.3V regulator to be used by SDHI0 */
 static struct regulator_consumer_supply vcc_sdhi0_consumers[] = {
 	REGULATOR_SUPPLY("vmmc", "sh_mobile_sdhi.0"),
@@ -1196,6 +1201,8 @@ static void __init eva_init(void)
 
 	regulator_register_always_on(0, "fixed-3.3V", fixed3v3_power_consumers,
 				     ARRAY_SIZE(fixed3v3_power_consumers), 3300000);
+	regulator_register_always_on(3, "fixed-5.0V", fixed5v0_power_consumers,
+				     ARRAY_SIZE(fixed5v0_power_consumers), 5000000);
 
 	pinctrl_register_mappings(eva_pinctrl_map, ARRAY_SIZE(eva_pinctrl_map));
 	pwm_add_table(pwm_lookup, ARRAY_SIZE(pwm_lookup));
diff --git a/arch/arm/mach-shmobile/board-bockw.c b/arch/arm/mach-shmobile/board-bockw.c
index 38611526fe9a..3c4995aebd22 100644
--- a/arch/arm/mach-shmobile/board-bockw.c
+++ b/arch/arm/mach-shmobile/board-bockw.c
@@ -679,7 +679,7 @@ static void __init bockw_init(void)
 			.id             = i,
 			.data           = &rsnd_card_info[i],
 			.size_data      = sizeof(struct asoc_simple_card_info),
-			.dma_mask       = ~0,
+			.dma_mask	= DMA_BIT_MASK(32),
 		};
 
 		platform_device_register_full(&cardinfo);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index dc1ec0dff939..ea04b342c026 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -387,7 +387,8 @@ static void init_intel(struct cpuinfo_x86 *c)
 			set_cpu_cap(c, X86_FEATURE_PEBS);
 	}
 
-	if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush)
+	if (c->x86 == 6 && cpu_has_clflush &&
+	    (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
 		set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR);
 
 #ifdef CONFIG_X86_64
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index ba6cf8e9aa0a..b91ce75bd35d 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -335,9 +335,22 @@ static struct kobj_type blk_mq_hw_ktype = {
 void blk_mq_unregister_disk(struct gendisk *disk)
 {
 	struct request_queue *q = disk->queue;
+	struct blk_mq_hw_ctx *hctx;
+	struct blk_mq_ctx *ctx;
+	int i, j;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		hctx_for_each_ctx(hctx, ctx, j) {
+			kobject_del(&ctx->kobj);
+			kobject_put(&ctx->kobj);
+		}
+		kobject_del(&hctx->kobj);
+		kobject_put(&hctx->kobj);
+	}
 
 	kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
 	kobject_del(&q->mq_kobj);
+	kobject_put(&q->mq_kobj);
 
 	kobject_put(&disk_to_dev(disk)->kobj);
 }
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 5d9248526d78..4770de5707b9 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -348,7 +348,6 @@ source "drivers/acpi/apei/Kconfig"
 config ACPI_EXTLOG
 	tristate "Extended Error Log support"
 	depends on X86_MCE && X86_LOCAL_APIC
-	select EFI
 	select UEFI_CPER
 	default n
 	help
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 6745fe137b9e..e60390597372 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -162,6 +162,7 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = {
 	{ "80860F14", (unsigned long)&byt_sdio_dev_desc },
 	{ "80860F41", (unsigned long)&byt_i2c_dev_desc },
 	{ "INT33B2", },
+	{ "INT33FC", },
 
 	{ "INT3430", (unsigned long)&lpt_dev_desc },
 	{ "INT3431", (unsigned long)&lpt_dev_desc },
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index 786294bb682c..3650b2183227 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -2,7 +2,6 @@ config ACPI_APEI
 	bool "ACPI Platform Error Interface (APEI)"
 	select MISC_FILESYSTEMS
 	select PSTORE
-	select EFI
 	select UEFI_CPER
 	depends on X86
 	help
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 14f1e9506338..c0ed4f273cf2 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1238,15 +1238,6 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		return rc;
 
-	/* AHCI controllers often implement SFF compatible interface.
-	 * Grab all PCI BARs just in case.
-	 */
-	rc = pcim_iomap_regions_request_all(pdev, 1 << ahci_pci_bar, DRV_NAME);
-	if (rc == -EBUSY)
-		pcim_pin_device(pdev);
-	if (rc)
-		return rc;
-
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
 	    (pdev->device == 0x2652 || pdev->device == 0x2653)) {
 		u8 map;
@@ -1263,6 +1254,15 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		}
 	}
 
+	/* AHCI controllers often implement SFF compatible interface.
+	 * Grab all PCI BARs just in case.
+	 */
+	rc = pcim_iomap_regions_request_all(pdev, 1 << ahci_pci_bar, DRV_NAME);
+	if (rc == -EBUSY)
+		pcim_pin_device(pdev);
+	if (rc)
+		return rc;
+
 	hpriv = devm_kzalloc(dev, sizeof(*hpriv), GFP_KERNEL);
 	if (!hpriv)
 		return -ENOMEM;
diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c
index ae2d73fe321e..3e23e9941dad 100644
--- a/drivers/ata/ahci_imx.c
+++ b/drivers/ata/ahci_imx.c
@@ -113,7 +113,7 @@ static int imx6q_sata_init(struct device *dev, void __iomem *mmio)
 	/*
 	 * set PHY Paremeters, two steps to configure the GPR13,
 	 * one write for rest of parameters, mask of first write
-	 * is 0x07fffffd, and the other one write for setting
+	 * is 0x07ffffff, and the other one write for setting
 	 * the mpll_clk_en.
 	 */
 	regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK
@@ -124,6 +124,7 @@ static int imx6q_sata_init(struct device *dev, void __iomem *mmio)
 			| IMX6Q_GPR13_SATA_TX_ATTEN_MASK
 			| IMX6Q_GPR13_SATA_TX_BOOST_MASK
 			| IMX6Q_GPR13_SATA_TX_LVL_MASK
+			| IMX6Q_GPR13_SATA_MPLL_CLK_EN
 			| IMX6Q_GPR13_SATA_TX_EDGE_RATE
 			, IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB
 			| IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 75b93678bbcd..1393a5890ed5 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2149,9 +2149,16 @@ static int ata_dev_config_ncq(struct ata_device *dev,
 				    "failed to get NCQ Send/Recv Log Emask 0x%x\n",
 				    err_mask);
 		} else {
+			u8 *cmds = dev->ncq_send_recv_cmds;
+
 			dev->flags |= ATA_DFLAG_NCQ_SEND_RECV;
-			memcpy(dev->ncq_send_recv_cmds, ap->sector_buf,
-				ATA_LOG_NCQ_SEND_RECV_SIZE);
+			memcpy(cmds, ap->sector_buf, ATA_LOG_NCQ_SEND_RECV_SIZE);
+
+			if (dev->horkage & ATA_HORKAGE_NO_NCQ_TRIM) {
+				ata_dev_dbg(dev, "disabling queued TRIM support\n");
+				cmds[ATA_LOG_NCQ_SEND_RECV_DSM_OFFSET] &=
+					~ATA_LOG_NCQ_SEND_RECV_DSM_TRIM;
+			}
 		}
 	}
 
@@ -4156,6 +4163,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	{ "ST3320[68]13AS",	"SD1[5-9]",	ATA_HORKAGE_NONCQ |
 						ATA_HORKAGE_FIRMWARE_WARN },
 
+	/* Seagate Momentus SpinPoint M8 seem to have FPMDA_AA issues */
+	{ "ST1000LM024 HN-M101MBB", "2AR10001",	ATA_HORKAGE_BROKEN_FPDMA_AA },
+
 	/* Blacklist entries taken from Silicon Image 3124/3132
 	   Windows driver .inf file - also several Linux problem reports */
 	{ "HTS541060G9SA00",    "MB3OC60D",     ATA_HORKAGE_NONCQ, },
@@ -4202,6 +4212,10 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	{ "PIONEER DVD-RW  DVR-212D",	NULL,	ATA_HORKAGE_NOSETXFER },
 	{ "PIONEER DVD-RW  DVR-216D",	NULL,	ATA_HORKAGE_NOSETXFER },
 
+	/* devices that don't properly handle queued TRIM commands */
+	{ "Micron_M500*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM, },
+	{ "Crucial_CT???M500SSD1",	NULL,	ATA_HORKAGE_NO_NCQ_TRIM, },
+
 	/* End Marker */
 	{ }
 };
@@ -6519,6 +6533,7 @@ static int __init ata_parse_force_one(char **cur,
 		{ "norst",	.lflags		= ATA_LFLAG_NO_HRST | ATA_LFLAG_NO_SRST },
 		{ "rstonce",	.lflags		= ATA_LFLAG_RST_ONCE },
 		{ "atapi_dmadir", .horkage_on	= ATA_HORKAGE_ATAPI_DMADIR },
+		{ "disable",	.horkage_on	= ATA_HORKAGE_DISABLE },
 	};
 	char *start = *cur, *p = *cur;
 	char *id, *val, *endp;
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index ab58556d347c..377eb889f555 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3872,6 +3872,27 @@ void ata_scsi_hotplug(struct work_struct *work)
 		return;
 	}
 
+	/*
+	 * XXX - UGLY HACK
+	 *
+	 * The block layer suspend/resume path is fundamentally broken due
+	 * to freezable kthreads and workqueue and may deadlock if a block
+	 * device gets removed while resume is in progress.  I don't know
+	 * what the solution is short of removing freezable kthreads and
+	 * workqueues altogether.
+	 *
+	 * The following is an ugly hack to avoid kicking off device
+	 * removal while freezer is active.  This is a joke but does avoid
+	 * this particular deadlock scenario.
+	 *
+	 * https://bugzilla.kernel.org/show_bug.cgi?id=62801
+	 * http://marc.info/?l=linux-kernel&m=138695698516487
+	 */
+#ifdef CONFIG_FREEZER
+	while (pm_freezing)
+		msleep(10);
+#endif
+
 	DPRINTK("ENTER\n");
 	mutex_lock(&ap->scsi_scan_mutex);
 
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index f370fc13aea5..a2e69d26266d 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -1,4 +1,5 @@
 #include <linux/module.h>
+
 #include <linux/moduleparam.h>
 #include <linux/sched.h>
 #include <linux/fs.h>
@@ -65,7 +66,7 @@ enum {
 	NULL_Q_MQ		= 2,
 };
 
-static int submit_queues = 1;
+static int submit_queues;
 module_param(submit_queues, int, S_IRUGO);
 MODULE_PARM_DESC(submit_queues, "Number of submission queues");
 
@@ -101,9 +102,9 @@ static int hw_queue_depth = 64;
 module_param(hw_queue_depth, int, S_IRUGO);
 MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
 
-static bool use_per_node_hctx = true;
+static bool use_per_node_hctx = false;
 module_param(use_per_node_hctx, bool, S_IRUGO);
-MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: true");
+MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
 
 static void put_tag(struct nullb_queue *nq, unsigned int tag)
 {
@@ -346,8 +347,37 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
 
 static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index)
 {
-	return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL,
-				hctx_index);
+	int b_size = DIV_ROUND_UP(reg->nr_hw_queues, nr_online_nodes);
+	int tip = (reg->nr_hw_queues % nr_online_nodes);
+	int node = 0, i, n;
+
+	/*
+	 * Split submit queues evenly wrt to the number of nodes. If uneven,
+	 * fill the first buckets with one extra, until the rest is filled with
+	 * no extra.
+	 */
+	for (i = 0, n = 1; i < hctx_index; i++, n++) {
+		if (n % b_size == 0) {
+			n = 0;
+			node++;
+
+			tip--;
+			if (!tip)
+				b_size = reg->nr_hw_queues / nr_online_nodes;
+		}
+	}
+
+	/*
+	 * A node might not be online, therefore map the relative node id to the
+	 * real node id.
+	 */
+	for_each_online_node(n) {
+		if (!node)
+			break;
+		node--;
+	}
+
+	return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, n);
 }
 
 static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index)
@@ -355,16 +385,24 @@ static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index)
 	kfree(hctx);
 }
 
+static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
+{
+	BUG_ON(!nullb);
+	BUG_ON(!nq);
+
+	init_waitqueue_head(&nq->wait);
+	nq->queue_depth = nullb->queue_depth;
+}
+
 static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 			  unsigned int index)
 {
 	struct nullb *nullb = data;
 	struct nullb_queue *nq = &nullb->queues[index];
 
-	init_waitqueue_head(&nq->wait);
-	nq->queue_depth = nullb->queue_depth;
-	nullb->nr_queues++;
 	hctx->driver_data = nq;
+	null_init_queue(nullb, nq);
+	nullb->nr_queues++;
 
 	return 0;
 }
@@ -417,13 +455,13 @@ static int setup_commands(struct nullb_queue *nq)
 
 	nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL);
 	if (!nq->cmds)
-		return 1;
+		return -ENOMEM;
 
 	tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
 	nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL);
 	if (!nq->tag_map) {
 		kfree(nq->cmds);
-		return 1;
+		return -ENOMEM;
 	}
 
 	for (i = 0; i < nq->queue_depth; i++) {
@@ -454,33 +492,37 @@ static void cleanup_queues(struct nullb *nullb)
 
 static int setup_queues(struct nullb *nullb)
 {
-	struct nullb_queue *nq;
-	int i;
-
-	nullb->queues = kzalloc(submit_queues * sizeof(*nq), GFP_KERNEL);
+	nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue),
+								GFP_KERNEL);
 	if (!nullb->queues)
-		return 1;
+		return -ENOMEM;
 
 	nullb->nr_queues = 0;
 	nullb->queue_depth = hw_queue_depth;
 
-	if (queue_mode == NULL_Q_MQ)
-		return 0;
+	return 0;
+}
+
+static int init_driver_queues(struct nullb *nullb)
+{
+	struct nullb_queue *nq;
+	int i, ret = 0;
 
 	for (i = 0; i < submit_queues; i++) {
 		nq = &nullb->queues[i];
-		init_waitqueue_head(&nq->wait);
-		nq->queue_depth = hw_queue_depth;
-		if (setup_commands(nq))
-			break;
+
+		null_init_queue(nullb, nq);
+
+		ret = setup_commands(nq);
+		if (ret)
+			goto err_queue;
 		nullb->nr_queues++;
 	}
 
-	if (i == submit_queues)
-		return 0;
-
+	return 0;
+err_queue:
 	cleanup_queues(nullb);
-	return 1;
+	return ret;
 }
 
 static int null_add_dev(void)
@@ -518,11 +560,13 @@ static int null_add_dev(void)
 	} else if (queue_mode == NULL_Q_BIO) {
 		nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
 		blk_queue_make_request(nullb->q, null_queue_bio);
+		init_driver_queues(nullb);
 	} else {
 		nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node);
 		blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
 		if (nullb->q)
 			blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
+		init_driver_queues(nullb);
 	}
 
 	if (!nullb->q)
@@ -579,7 +623,13 @@ static int __init null_init(void)
 	}
 #endif
 
-	if (submit_queues > nr_cpu_ids)
+	if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
+		if (submit_queues < nr_online_nodes) {
+			pr_warn("null_blk: submit_queues param is set to %u.",
+							nr_online_nodes);
+			submit_queues = nr_online_nodes;
+		}
+	} else if (submit_queues > nr_cpu_ids)
 		submit_queues = nr_cpu_ids;
 	else if (!submit_queues)
 		submit_queues = 1;
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 9199c93be926..eb6e1e0e8db2 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -5269,7 +5269,7 @@ const char *skd_skdev_state_to_str(enum skd_drvr_state state)
 	}
 }
 
-const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state)
+static const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state)
 {
 	switch (state) {
 	case SKD_MSG_STATE_IDLE:
@@ -5281,7 +5281,7 @@ const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state)
 	}
 }
 
-const char *skd_skreq_state_to_str(enum skd_req_state state)
+static const char *skd_skreq_state_to_str(enum skd_req_state state)
 {
 	switch (state) {
 	case SKD_REQ_STATE_IDLE:
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index d79d692d05b8..896413b59aae 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -735,7 +735,7 @@ static struct pci_device_id agp_amd64_pci_table[] = {
 
 MODULE_DEVICE_TABLE(pci, agp_amd64_pci_table);
 
-static DEFINE_PCI_DEVICE_TABLE(agp_amd64_pci_promisc_table) = {
+static const struct pci_device_id agp_amd64_pci_promisc_table[] = {
 	{ PCI_DEVICE_CLASS(0, 0) },
 	{ }
 };
diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c
index e6939e13e338..e210f858d3cb 100644
--- a/drivers/char/i8k.c
+++ b/drivers/char/i8k.c
@@ -1,12 +1,11 @@
 /*
  * i8k.c -- Linux driver for accessing the SMM BIOS on Dell laptops.
- *	    See http://www.debian.org/~dz/i8k/ for more information
- *	    and for latest version of this driver.
  *
  * Copyright (C) 2001  Massimo Dal Zotto <dz@debian.org>
  *
  * Hwmon integration:
  * Copyright (C) 2011  Jean Delvare <khali@linux-fr.org>
+ * Copyright (C) 2013  Guenter Roeck <linux@roeck-us.net>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -19,6 +18,8 @@
  * General Public License for more details.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/init.h>
@@ -29,13 +30,12 @@
 #include <linux/mutex.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/sched.h>
 
 #include <linux/i8k.h>
 
-#define I8K_VERSION		"1.14 21/02/2005"
-
 #define I8K_SMM_FN_STATUS	0x0025
 #define I8K_SMM_POWER_STATUS	0x0069
 #define I8K_SMM_SET_FAN		0x01a3
@@ -44,7 +44,6 @@
 #define I8K_SMM_GET_TEMP	0x10a3
 #define I8K_SMM_GET_DELL_SIG1	0xfea3
 #define I8K_SMM_GET_DELL_SIG2	0xffa3
-#define I8K_SMM_BIOS_VERSION	0x00a6
 
 #define I8K_FAN_MULT		30
 #define I8K_MAX_TEMP		127
@@ -64,6 +63,15 @@
 static DEFINE_MUTEX(i8k_mutex);
 static char bios_version[4];
 static struct device *i8k_hwmon_dev;
+static u32 i8k_hwmon_flags;
+static int i8k_fan_mult;
+
+#define I8K_HWMON_HAVE_TEMP1	(1 << 0)
+#define I8K_HWMON_HAVE_TEMP2	(1 << 1)
+#define I8K_HWMON_HAVE_TEMP3	(1 << 2)
+#define I8K_HWMON_HAVE_TEMP4	(1 << 3)
+#define I8K_HWMON_HAVE_FAN1	(1 << 4)
+#define I8K_HWMON_HAVE_FAN2	(1 << 5)
 
 MODULE_AUTHOR("Massimo Dal Zotto (dz@debian.org)");
 MODULE_DESCRIPTION("Driver for accessing SMM BIOS on Dell laptops");
@@ -103,11 +111,11 @@ static const struct file_operations i8k_fops = {
 
 struct smm_regs {
 	unsigned int eax;
-	unsigned int ebx __attribute__ ((packed));
-	unsigned int ecx __attribute__ ((packed));
-	unsigned int edx __attribute__ ((packed));
-	unsigned int esi __attribute__ ((packed));
-	unsigned int edi __attribute__ ((packed));
+	unsigned int ebx __packed;
+	unsigned int ecx __packed;
+	unsigned int edx __packed;
+	unsigned int esi __packed;
+	unsigned int edi __packed;
 };
 
 static inline const char *i8k_get_dmi_data(int field)
@@ -124,6 +132,17 @@ static int i8k_smm(struct smm_regs *regs)
 {
 	int rc;
 	int eax = regs->eax;
+	cpumask_var_t old_mask;
+
+	/* SMM requires CPU 0 */
+	if (!alloc_cpumask_var(&old_mask, GFP_KERNEL))
+		return -ENOMEM;
+	cpumask_copy(old_mask, &current->cpus_allowed);
+	set_cpus_allowed_ptr(current, cpumask_of(0));
+	if (smp_processor_id() != 0) {
+		rc = -EBUSY;
+		goto out;
+	}
 
 #if defined(CONFIG_X86_64)
 	asm volatile("pushq %%rax\n\t"
@@ -148,7 +167,7 @@ static int i8k_smm(struct smm_regs *regs)
 		"pushfq\n\t"
 		"popq %%rax\n\t"
 		"andl $1,%%eax\n"
-		:"=a"(rc)
+		: "=a"(rc)
 		:    "a"(regs)
 		:    "%ebx", "%ecx", "%edx", "%esi", "%edi", "memory");
 #else
@@ -174,25 +193,17 @@ static int i8k_smm(struct smm_regs *regs)
 	    "lahf\n\t"
 	    "shrl $8,%%eax\n\t"
 	    "andl $1,%%eax\n"
-	    :"=a"(rc)
+	    : "=a"(rc)
 	    :    "a"(regs)
 	    :    "%ebx", "%ecx", "%edx", "%esi", "%edi", "memory");
 #endif
 	if (rc != 0 || (regs->eax & 0xffff) == 0xffff || regs->eax == eax)
-		return -EINVAL;
+		rc = -EINVAL;
 
-	return 0;
-}
-
-/*
- * Read the bios version. Return the version as an integer corresponding
- * to the ascii value, for example "A17" is returned as 0x00413137.
- */
-static int i8k_get_bios_version(void)
-{
-	struct smm_regs regs = { .eax = I8K_SMM_BIOS_VERSION, };
-
-	return i8k_smm(&regs) ? : regs.eax;
+out:
+	set_cpus_allowed_ptr(current, old_mask);
+	free_cpumask_var(old_mask);
+	return rc;
 }
 
 /*
@@ -203,7 +214,8 @@ static int i8k_get_fn_status(void)
 	struct smm_regs regs = { .eax = I8K_SMM_FN_STATUS, };
 	int rc;
 
-	if ((rc = i8k_smm(&regs)) < 0)
+	rc = i8k_smm(&regs);
+	if (rc < 0)
 		return rc;
 
 	switch ((regs.eax >> I8K_FN_SHIFT) & I8K_FN_MASK) {
@@ -226,7 +238,8 @@ static int i8k_get_power_status(void)
 	struct smm_regs regs = { .eax = I8K_SMM_POWER_STATUS, };
 	int rc;
 
-	if ((rc = i8k_smm(&regs)) < 0)
+	rc = i8k_smm(&regs);
+	if (rc < 0)
 		return rc;
 
 	return (regs.eax & 0xff) == I8K_POWER_AC ? I8K_AC : I8K_BATTERY;
@@ -251,7 +264,7 @@ static int i8k_get_fan_speed(int fan)
 	struct smm_regs regs = { .eax = I8K_SMM_GET_SPEED, };
 
 	regs.ebx = fan & 0xff;
-	return i8k_smm(&regs) ? : (regs.eax & 0xffff) * fan_mult;
+	return i8k_smm(&regs) ? : (regs.eax & 0xffff) * i8k_fan_mult;
 }
 
 /*
@@ -277,10 +290,11 @@ static int i8k_get_temp(int sensor)
 	int temp;
 
 #ifdef I8K_TEMPERATURE_BUG
-	static int prev;
+	static int prev[4];
 #endif
 	regs.ebx = sensor & 0xff;
-	if ((rc = i8k_smm(&regs)) < 0)
+	rc = i8k_smm(&regs);
+	if (rc < 0)
 		return rc;
 
 	temp = regs.eax & 0xff;
@@ -294,10 +308,10 @@ static int i8k_get_temp(int sensor)
 	 # 1003655139 00000054 00005c52
 	 */
 	if (temp > I8K_MAX_TEMP) {
-		temp = prev;
-		prev = I8K_MAX_TEMP;
+		temp = prev[sensor];
+		prev[sensor] = I8K_MAX_TEMP;
 	} else {
-		prev = temp;
+		prev[sensor] = temp;
 	}
 #endif
 
@@ -309,7 +323,8 @@ static int i8k_get_dell_signature(int req_fn)
 	struct smm_regs regs = { .eax = req_fn, };
 	int rc;
 
-	if ((rc = i8k_smm(&regs)) < 0)
+	rc = i8k_smm(&regs);
+	if (rc < 0)
 		return rc;
 
 	return regs.eax == 1145651527 && regs.edx == 1145392204 ? 0 : -1;
@@ -328,12 +343,14 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case I8K_BIOS_VERSION:
-		val = i8k_get_bios_version();
+		val = (bios_version[0] << 16) |
+				(bios_version[1] << 8) | bios_version[2];
 		break;
 
 	case I8K_MACHINE_ID:
 		memset(buff, 0, 16);
-		strlcpy(buff, i8k_get_dmi_data(DMI_PRODUCT_SERIAL), sizeof(buff));
+		strlcpy(buff, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+			sizeof(buff));
 		break;
 
 	case I8K_FN_STATUS:
@@ -470,12 +487,13 @@ static ssize_t i8k_hwmon_show_temp(struct device *dev,
 				   struct device_attribute *devattr,
 				   char *buf)
 {
-	int cpu_temp;
+	int index = to_sensor_dev_attr(devattr)->index;
+	int temp;
 
-	cpu_temp = i8k_get_temp(0);
-	if (cpu_temp < 0)
-		return cpu_temp;
-	return sprintf(buf, "%d\n", cpu_temp * 1000);
+	temp = i8k_get_temp(index);
+	if (temp < 0)
+		return temp;
+	return sprintf(buf, "%d\n", temp * 1000);
 }
 
 static ssize_t i8k_hwmon_show_fan(struct device *dev,
@@ -491,12 +509,44 @@ static ssize_t i8k_hwmon_show_fan(struct device *dev,
 	return sprintf(buf, "%d\n", fan_speed);
 }
 
+static ssize_t i8k_hwmon_show_pwm(struct device *dev,
+				  struct device_attribute *devattr,
+				  char *buf)
+{
+	int index = to_sensor_dev_attr(devattr)->index;
+	int status;
+
+	status = i8k_get_fan_status(index);
+	if (status < 0)
+		return -EIO;
+	return sprintf(buf, "%d\n", clamp_val(status * 128, 0, 255));
+}
+
+static ssize_t i8k_hwmon_set_pwm(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	int index = to_sensor_dev_attr(attr)->index;
+	unsigned long val;
+	int err;
+
+	err = kstrtoul(buf, 10, &val);
+	if (err)
+		return err;
+	val = clamp_val(DIV_ROUND_CLOSEST(val, 128), 0, 2);
+
+	mutex_lock(&i8k_mutex);
+	err = i8k_set_fan(index, val);
+	mutex_unlock(&i8k_mutex);
+
+	return err < 0 ? -EIO : count;
+}
+
 static ssize_t i8k_hwmon_show_label(struct device *dev,
 				    struct device_attribute *devattr,
 				    char *buf)
 {
-	static const char *labels[4] = {
-		"i8k",
+	static const char *labels[3] = {
 		"CPU",
 		"Left Fan",
 		"Right Fan",
@@ -506,108 +556,108 @@ static ssize_t i8k_hwmon_show_label(struct device *dev,
 	return sprintf(buf, "%s\n", labels[index]);
 }
 
-static DEVICE_ATTR(temp1_input, S_IRUGO, i8k_hwmon_show_temp, NULL);
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, i8k_hwmon_show_temp, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, i8k_hwmon_show_temp, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, i8k_hwmon_show_temp, NULL, 2);
+static SENSOR_DEVICE_ATTR(temp4_input, S_IRUGO, i8k_hwmon_show_temp, NULL, 3);
 static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, i8k_hwmon_show_fan, NULL,
 			  I8K_FAN_LEFT);
+static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, i8k_hwmon_show_pwm,
+			  i8k_hwmon_set_pwm, I8K_FAN_LEFT);
 static SENSOR_DEVICE_ATTR(fan2_input, S_IRUGO, i8k_hwmon_show_fan, NULL,
 			  I8K_FAN_RIGHT);
-static SENSOR_DEVICE_ATTR(name, S_IRUGO, i8k_hwmon_show_label, NULL, 0);
-static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, i8k_hwmon_show_label, NULL, 1);
-static SENSOR_DEVICE_ATTR(fan1_label, S_IRUGO, i8k_hwmon_show_label, NULL, 2);
-static SENSOR_DEVICE_ATTR(fan2_label, S_IRUGO, i8k_hwmon_show_label, NULL, 3);
+static SENSOR_DEVICE_ATTR(pwm2, S_IRUGO | S_IWUSR, i8k_hwmon_show_pwm,
+			  i8k_hwmon_set_pwm, I8K_FAN_RIGHT);
+static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, i8k_hwmon_show_label, NULL, 0);
+static SENSOR_DEVICE_ATTR(fan1_label, S_IRUGO, i8k_hwmon_show_label, NULL, 1);
+static SENSOR_DEVICE_ATTR(fan2_label, S_IRUGO, i8k_hwmon_show_label, NULL, 2);
+
+static struct attribute *i8k_attrs[] = {
+	&sensor_dev_attr_temp1_input.dev_attr.attr,	/* 0 */
+	&sensor_dev_attr_temp1_label.dev_attr.attr,	/* 1 */
+	&sensor_dev_attr_temp2_input.dev_attr.attr,	/* 2 */
+	&sensor_dev_attr_temp3_input.dev_attr.attr,	/* 3 */
+	&sensor_dev_attr_temp4_input.dev_attr.attr,	/* 4 */
+	&sensor_dev_attr_fan1_input.dev_attr.attr,	/* 5 */
+	&sensor_dev_attr_pwm1.dev_attr.attr,		/* 6 */
+	&sensor_dev_attr_fan1_label.dev_attr.attr,	/* 7 */
+	&sensor_dev_attr_fan2_input.dev_attr.attr,	/* 8 */
+	&sensor_dev_attr_pwm2.dev_attr.attr,		/* 9 */
+	&sensor_dev_attr_fan2_label.dev_attr.attr,	/* 10 */
+	NULL
+};
 
-static void i8k_hwmon_remove_files(struct device *dev)
+static umode_t i8k_is_visible(struct kobject *kobj, struct attribute *attr,
+			      int index)
 {
-	device_remove_file(dev, &dev_attr_temp1_input);
-	device_remove_file(dev, &sensor_dev_attr_fan1_input.dev_attr);
-	device_remove_file(dev, &sensor_dev_attr_fan2_input.dev_attr);
-	device_remove_file(dev, &sensor_dev_attr_temp1_label.dev_attr);
-	device_remove_file(dev, &sensor_dev_attr_fan1_label.dev_attr);
-	device_remove_file(dev, &sensor_dev_attr_fan2_label.dev_attr);
-	device_remove_file(dev, &sensor_dev_attr_name.dev_attr);
+	if ((index == 0 || index == 1) &&
+	    !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP1))
+		return 0;
+	if (index == 2 && !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP2))
+		return 0;
+	if (index == 3 && !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP3))
+		return 0;
+	if (index == 4 && !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP4))
+		return 0;
+	if (index >= 5 && index <= 7 &&
+	    !(i8k_hwmon_flags & I8K_HWMON_HAVE_FAN1))
+		return 0;
+	if (index >= 8 && index <= 10 &&
+	    !(i8k_hwmon_flags & I8K_HWMON_HAVE_FAN2))
+		return 0;
+
+	return attr->mode;
 }
 
+static const struct attribute_group i8k_group = {
+	.attrs = i8k_attrs,
+	.is_visible = i8k_is_visible,
+};
+__ATTRIBUTE_GROUPS(i8k);
+
 static int __init i8k_init_hwmon(void)
 {
 	int err;
 
-	i8k_hwmon_dev = hwmon_device_register(NULL);
-	if (IS_ERR(i8k_hwmon_dev)) {
-		err = PTR_ERR(i8k_hwmon_dev);
-		i8k_hwmon_dev = NULL;
-		printk(KERN_ERR "i8k: hwmon registration failed (%d)\n", err);
-		return err;
-	}
-
-	/* Required name attribute */
-	err = device_create_file(i8k_hwmon_dev,
-				 &sensor_dev_attr_name.dev_attr);
-	if (err)
-		goto exit_unregister;
+	i8k_hwmon_flags = 0;
 
 	/* CPU temperature attributes, if temperature reading is OK */
 	err = i8k_get_temp(0);
-	if (err < 0) {
-		dev_dbg(i8k_hwmon_dev,
-			"Not creating temperature attributes (%d)\n", err);
-	} else {
-		err = device_create_file(i8k_hwmon_dev, &dev_attr_temp1_input);
-		if (err)
-			goto exit_remove_files;
-		err = device_create_file(i8k_hwmon_dev,
-					 &sensor_dev_attr_temp1_label.dev_attr);
-		if (err)
-			goto exit_remove_files;
-	}
+	if (err >= 0)
+		i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP1;
+	/* check for additional temperature sensors */
+	err = i8k_get_temp(1);
+	if (err >= 0)
+		i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP2;
+	err = i8k_get_temp(2);
+	if (err >= 0)
+		i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP3;
+	err = i8k_get_temp(3);
+	if (err >= 0)
+		i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP4;
 
 	/* Left fan attributes, if left fan is present */
 	err = i8k_get_fan_status(I8K_FAN_LEFT);
-	if (err < 0) {
-		dev_dbg(i8k_hwmon_dev,
-			"Not creating %s fan attributes (%d)\n", "left", err);
-	} else {
-		err = device_create_file(i8k_hwmon_dev,
-					 &sensor_dev_attr_fan1_input.dev_attr);
-		if (err)
-			goto exit_remove_files;
-		err = device_create_file(i8k_hwmon_dev,
-					 &sensor_dev_attr_fan1_label.dev_attr);
-		if (err)
-			goto exit_remove_files;
-	}
+	if (err >= 0)
+		i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN1;
 
 	/* Right fan attributes, if right fan is present */
 	err = i8k_get_fan_status(I8K_FAN_RIGHT);
-	if (err < 0) {
-		dev_dbg(i8k_hwmon_dev,
-			"Not creating %s fan attributes (%d)\n", "right", err);
-	} else {
-		err = device_create_file(i8k_hwmon_dev,
-					 &sensor_dev_attr_fan2_input.dev_attr);
-		if (err)
-			goto exit_remove_files;
-		err = device_create_file(i8k_hwmon_dev,
-					 &sensor_dev_attr_fan2_label.dev_attr);
-		if (err)
-			goto exit_remove_files;
-	}
+	if (err >= 0)
+		i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN2;
 
+	i8k_hwmon_dev = hwmon_device_register_with_groups(NULL, "i8k", NULL,
+							  i8k_groups);
+	if (IS_ERR(i8k_hwmon_dev)) {
+		err = PTR_ERR(i8k_hwmon_dev);
+		i8k_hwmon_dev = NULL;
+		pr_err("hwmon registration failed (%d)\n", err);
+		return err;
+	}
 	return 0;
-
- exit_remove_files:
-	i8k_hwmon_remove_files(i8k_hwmon_dev);
- exit_unregister:
-	hwmon_device_unregister(i8k_hwmon_dev);
-	return err;
 }
 
-static void __exit i8k_exit_hwmon(void)
-{
-	i8k_hwmon_remove_files(i8k_hwmon_dev);
-	hwmon_device_unregister(i8k_hwmon_dev);
-}
-
-static struct dmi_system_id __initdata i8k_dmi_table[] = {
+static struct dmi_system_id i8k_dmi_table[] __initdata = {
 	{
 		.ident = "Dell Inspiron",
 		.matches = {
@@ -671,7 +721,23 @@ static struct dmi_system_id __initdata i8k_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "XPS L421X"),
 		},
 	},
-        { }
+	{
+		.ident = "Dell Studio",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Studio"),
+		},
+		.driver_data = (void *)1,	/* fan multiplier override */
+	},
+	{
+		.ident = "Dell XPS M140",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "MXC051"),
+		},
+		.driver_data = (void *)1,	/* fan multiplier override */
+	},
+	{ }
 };
 
 /*
@@ -679,8 +745,7 @@ static struct dmi_system_id __initdata i8k_dmi_table[] = {
  */
 static int __init i8k_probe(void)
 {
-	char buff[4];
-	int version;
+	const struct dmi_system_id *id;
 
 	/*
 	 * Get DMI information
@@ -689,49 +754,30 @@ static int __init i8k_probe(void)
 		if (!ignore_dmi && !force)
 			return -ENODEV;
 
-		printk(KERN_INFO "i8k: not running on a supported Dell system.\n");
-		printk(KERN_INFO "i8k: vendor=%s, model=%s, version=%s\n",
+		pr_info("not running on a supported Dell system.\n");
+		pr_info("vendor=%s, model=%s, version=%s\n",
 			i8k_get_dmi_data(DMI_SYS_VENDOR),
 			i8k_get_dmi_data(DMI_PRODUCT_NAME),
 			i8k_get_dmi_data(DMI_BIOS_VERSION));
 	}
 
-	strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION), sizeof(bios_version));
+	strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION),
+		sizeof(bios_version));
 
 	/*
 	 * Get SMM Dell signature
 	 */
 	if (i8k_get_dell_signature(I8K_SMM_GET_DELL_SIG1) &&
 	    i8k_get_dell_signature(I8K_SMM_GET_DELL_SIG2)) {
-		printk(KERN_ERR "i8k: unable to get SMM Dell signature\n");
+		pr_err("unable to get SMM Dell signature\n");
 		if (!force)
 			return -ENODEV;
 	}
 
-	/*
-	 * Get SMM BIOS version.
-	 */
-	version = i8k_get_bios_version();
-	if (version <= 0) {
-		printk(KERN_WARNING "i8k: unable to get SMM BIOS version\n");
-	} else {
-		buff[0] = (version >> 16) & 0xff;
-		buff[1] = (version >> 8) & 0xff;
-		buff[2] = (version) & 0xff;
-		buff[3] = '\0';
-		/*
-		 * If DMI BIOS version is unknown use SMM BIOS version.
-		 */
-		if (!dmi_get_system_info(DMI_BIOS_VERSION))
-			strlcpy(bios_version, buff, sizeof(bios_version));
-
-		/*
-		 * Check if the two versions match.
-		 */
-		if (strncmp(buff, bios_version, sizeof(bios_version)) != 0)
-			printk(KERN_WARNING "i8k: BIOS version mismatch: %s != %s\n",
-				buff, bios_version);
-	}
+	i8k_fan_mult = fan_mult;
+	id = dmi_first_match(i8k_dmi_table);
+	if (id && fan_mult == I8K_FAN_MULT && id->driver_data)
+		i8k_fan_mult = (unsigned long)id->driver_data;
 
 	return 0;
 }
@@ -754,10 +800,6 @@ static int __init i8k_init(void)
 	if (err)
 		goto exit_remove_proc;
 
-	printk(KERN_INFO
-	       "Dell laptop SMM driver v%s Massimo Dal Zotto (dz@debian.org)\n",
-	       I8K_VERSION);
-
 	return 0;
 
  exit_remove_proc:
@@ -767,7 +809,7 @@ static int __init i8k_init(void)
 
 static void __exit i8k_exit(void)
 {
-	i8k_exit_hwmon();
+	hwmon_device_unregister(i8k_hwmon_dev);
 	remove_proc_entry("i8k", NULL);
 }
 
diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index 0913d79424d3..c4094c4e22c1 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -587,6 +587,8 @@ static int lp_do_ioctl(unsigned int minor, unsigned int cmd,
 		return -ENODEV;
 	switch ( cmd ) {
 		case LPTIME:
+			if (arg > UINT_MAX / HZ)
+				return -EINVAL;
 			LP_TIME(minor) = arg * HZ/100;
 			break;
 		case LPCHAR:
diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c
index 1fd00dc06897..76c490fa0511 100644
--- a/drivers/char/nwbutton.c
+++ b/drivers/char/nwbutton.c
@@ -168,7 +168,10 @@ static irqreturn_t button_handler (int irq, void *dev_id)
 static int button_read (struct file *filp, char __user *buffer,
 			size_t count, loff_t *ppos)
 {
-	interruptible_sleep_on (&button_wait_queue);
+	DEFINE_WAIT(wait);
+	prepare_to_wait(&button_wait_queue, &wait, TASK_INTERRUPTIBLE);
+	schedule();
+	finish_wait(&button_wait_queue, &wait);
 	return (copy_to_user (buffer, &button_output_buffer, bcount))
 		 ? -EFAULT : bcount;
 }
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 02d534da22dd..16d7b4ac94be 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -828,6 +828,12 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)
 	int ret = 0;
 
 	memcpy(&new_policy, policy, sizeof(*policy));
+
+	/* Use the default policy if its valid. */
+	if (cpufreq_driver->setpolicy)
+		cpufreq_parse_governor(policy->governor->name,
+					&new_policy.policy, NULL);
+
 	/* assure that the starting sequence is run in cpufreq_set_policy */
 	policy->governor = NULL;
 
@@ -845,8 +851,7 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)
 
 #ifdef CONFIG_HOTPLUG_CPU
 static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
-				  unsigned int cpu, struct device *dev,
-				  bool frozen)
+				  unsigned int cpu, struct device *dev)
 {
 	int ret = 0;
 	unsigned long flags;
@@ -877,11 +882,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
 		}
 	}
 
-	/* Don't touch sysfs links during light-weight init */
-	if (!frozen)
-		ret = sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
-
-	return ret;
+	return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
 }
 #endif
 
@@ -926,6 +927,27 @@ err_free_policy:
 	return NULL;
 }
 
+static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy)
+{
+	struct kobject *kobj;
+	struct completion *cmp;
+
+	down_read(&policy->rwsem);
+	kobj = &policy->kobj;
+	cmp = &policy->kobj_unregister;
+	up_read(&policy->rwsem);
+	kobject_put(kobj);
+
+	/*
+	 * We need to make sure that the underlying kobj is
+	 * actually not referenced anymore by anybody before we
+	 * proceed with unloading.
+	 */
+	pr_debug("waiting for dropping of refcount\n");
+	wait_for_completion(cmp);
+	pr_debug("wait complete\n");
+}
+
 static void cpufreq_policy_free(struct cpufreq_policy *policy)
 {
 	free_cpumask_var(policy->related_cpus);
@@ -986,7 +1008,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif,
 	list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
 		if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
 			read_unlock_irqrestore(&cpufreq_driver_lock, flags);
-			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev, frozen);
+			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
 			up_read(&cpufreq_rwsem);
 			return ret;
 		}
@@ -1096,7 +1118,10 @@ err_get_freq:
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(policy);
 err_set_policy_cpu:
+	if (frozen)
+		cpufreq_policy_put_kobj(policy);
 	cpufreq_policy_free(policy);
+
 nomem_out:
 	up_read(&cpufreq_rwsem);
 
@@ -1118,7 +1143,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 }
 
 static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
-					   unsigned int old_cpu, bool frozen)
+					   unsigned int old_cpu)
 {
 	struct device *cpu_dev;
 	int ret;
@@ -1126,10 +1151,6 @@ static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
 	/* first sibling now owns the new sysfs dir */
 	cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu));
 
-	/* Don't touch sysfs files during light-weight tear-down */
-	if (frozen)
-		return cpu_dev->id;
-
 	sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
 	ret = kobject_move(&policy->kobj, &cpu_dev->kobj);
 	if (ret) {
@@ -1196,7 +1217,7 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
 		if (!frozen)
 			sysfs_remove_link(&dev->kobj, "cpufreq");
 	} else if (cpus > 1) {
-		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu, frozen);
+		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
 		if (new_cpu >= 0) {
 			update_policy_cpu(policy, new_cpu);
 
@@ -1218,8 +1239,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 	int ret;
 	unsigned long flags;
 	struct cpufreq_policy *policy;
-	struct kobject *kobj;
-	struct completion *cmp;
 
 	read_lock_irqsave(&cpufreq_driver_lock, flags);
 	policy = per_cpu(cpufreq_cpu_data, cpu);
@@ -1249,22 +1268,8 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 			}
 		}
 
-		if (!frozen) {
-			down_read(&policy->rwsem);
-			kobj = &policy->kobj;
-			cmp = &policy->kobj_unregister;
-			up_read(&policy->rwsem);
-			kobject_put(kobj);
-
-			/*
-			 * We need to make sure that the underlying kobj is
-			 * actually not referenced anymore by anybody before we
-			 * proceed with unloading.
-			 */
-			pr_debug("waiting for dropping of refcount\n");
-			wait_for_completion(cmp);
-			pr_debug("wait complete\n");
-		}
+		if (!frozen)
+			cpufreq_policy_put_kobj(policy);
 
 		/*
 		 * Perform the ->exit() even during light-weight tear-down,
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 299fad6b5867..5373dc5b6011 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -14,3 +14,4 @@ obj-$(CONFIG_FIRMWARE_MEMMAP)	+= memmap.o
 
 obj-$(CONFIG_GOOGLE_FIRMWARE)	+= google/
 obj-$(CONFIG_EFI)		+= efi/
+obj-$(CONFIG_UEFI_CPER)		+= efi/
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 3150aa4874e8..6aecbc86ec94 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -36,7 +36,7 @@ config EFI_VARS_PSTORE_DEFAULT_DISABLE
 	  backend for pstore by default. This setting can be overridden
 	  using the efivars module's pstore_disable parameter.
 
-config UEFI_CPER
-	def_bool n
-
 endmenu
+
+config UEFI_CPER
+	bool
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index 9ba156d3c775..6c2a41ec21ba 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -1,7 +1,7 @@
 #
 # Makefile for linux kernel
 #
-obj-y					+= efi.o vars.o
+obj-$(CONFIG_EFI)			+= efi.o vars.o
 obj-$(CONFIG_EFI_VARS)			+= efivars.o
 obj-$(CONFIG_EFI_VARS_PSTORE)		+= efi-pstore.o
 obj-$(CONFIG_UEFI_CPER)			+= cper.o
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 621c7c67a643..76d3d1ab73c6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2343,15 +2343,24 @@ static void i915_gem_free_request(struct drm_i915_gem_request *request)
 	kfree(request);
 }
 
-static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
-				      struct intel_ring_buffer *ring)
+static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
+				       struct intel_ring_buffer *ring)
 {
-	u32 completed_seqno;
-	u32 acthd;
+	u32 completed_seqno = ring->get_seqno(ring, false);
+	u32 acthd = intel_ring_get_active_head(ring);
+	struct drm_i915_gem_request *request;
+
+	list_for_each_entry(request, &ring->request_list, list) {
+		if (i915_seqno_passed(completed_seqno, request->seqno))
+			continue;
 
-	acthd = intel_ring_get_active_head(ring);
-	completed_seqno = ring->get_seqno(ring, false);
+		i915_set_reset_status(ring, request, acthd);
+	}
+}
 
+static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
+					struct intel_ring_buffer *ring)
+{
 	while (!list_empty(&ring->request_list)) {
 		struct drm_i915_gem_request *request;
 
@@ -2359,9 +2368,6 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
 					   struct drm_i915_gem_request,
 					   list);
 
-		if (request->seqno > completed_seqno)
-			i915_set_reset_status(ring, request, acthd);
-
 		i915_gem_free_request(request);
 	}
 
@@ -2403,8 +2409,16 @@ void i915_gem_reset(struct drm_device *dev)
 	struct intel_ring_buffer *ring;
 	int i;
 
+	/*
+	 * Before we free the objects from the requests, we need to inspect
+	 * them for finding the guilty party. As the requests only borrow
+	 * their reference to the objects, the inspection must be done first.
+	 */
+	for_each_ring(ring, dev_priv, i)
+		i915_gem_reset_ring_status(dev_priv, ring);
+
 	for_each_ring(ring, dev_priv, i)
-		i915_gem_reset_ring_lists(dev_priv, ring);
+		i915_gem_reset_ring_cleanup(dev_priv, ring);
 
 	i915_gem_cleanup_ringbuffer(dev);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index b7e787fb4649..a3ba9a8cd687 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -93,7 +93,7 @@ eb_lookup_vmas(struct eb_vmas *eb,
 {
 	struct drm_i915_gem_object *obj;
 	struct list_head objects;
-	int i, ret = 0;
+	int i, ret;
 
 	INIT_LIST_HEAD(&objects);
 	spin_lock(&file->table_lock);
@@ -106,7 +106,7 @@ eb_lookup_vmas(struct eb_vmas *eb,
 			DRM_DEBUG("Invalid object handle %d at index %d\n",
 				   exec[i].handle, i);
 			ret = -ENOENT;
-			goto out;
+			goto err;
 		}
 
 		if (!list_empty(&obj->obj_exec_link)) {
@@ -114,7 +114,7 @@ eb_lookup_vmas(struct eb_vmas *eb,
 			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
 				   obj, exec[i].handle, i);
 			ret = -EINVAL;
-			goto out;
+			goto err;
 		}
 
 		drm_gem_object_reference(&obj->base);
@@ -123,9 +123,13 @@ eb_lookup_vmas(struct eb_vmas *eb,
 	spin_unlock(&file->table_lock);
 
 	i = 0;
-	list_for_each_entry(obj, &objects, obj_exec_link) {
+	while (!list_empty(&objects)) {
 		struct i915_vma *vma;
 
+		obj = list_first_entry(&objects,
+				       struct drm_i915_gem_object,
+				       obj_exec_link);
+
 		/*
 		 * NOTE: We can leak any vmas created here when something fails
 		 * later on. But that's no issue since vma_unbind can deal with
@@ -138,10 +142,12 @@ eb_lookup_vmas(struct eb_vmas *eb,
 		if (IS_ERR(vma)) {
 			DRM_DEBUG("Failed to lookup VMA\n");
 			ret = PTR_ERR(vma);
-			goto out;
+			goto err;
 		}
 
+		/* Transfer ownership from the objects list to the vmas list. */
 		list_add_tail(&vma->exec_list, &eb->vmas);
+		list_del_init(&obj->obj_exec_link);
 
 		vma->exec_entry = &exec[i];
 		if (eb->and < 0) {
@@ -155,16 +161,22 @@ eb_lookup_vmas(struct eb_vmas *eb,
 		++i;
 	}
 
+	return 0;
+
 
-out:
+err:
 	while (!list_empty(&objects)) {
 		obj = list_first_entry(&objects,
 				       struct drm_i915_gem_object,
 				       obj_exec_link);
 		list_del_init(&obj->obj_exec_link);
-		if (ret)
-			drm_gem_object_unreference(&obj->base);
+		drm_gem_object_unreference(&obj->base);
 	}
+	/*
+	 * Objects already transfered to the vmas list will be unreferenced by
+	 * eb_destroy.
+	 */
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 8b8bde7dce53..54e82a80cf50 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6303,7 +6303,7 @@ static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv)
 	uint32_t val;
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head)
-		WARN(crtc->base.enabled, "CRTC for pipe %c enabled\n",
+		WARN(crtc->active, "CRTC for pipe %c enabled\n",
 		     pipe_name(crtc->pipe));
 
 	WARN(I915_READ(HSW_PWR_WELL_DRIVER), "Power well on\n");
@@ -11126,14 +11126,15 @@ void intel_connector_attach_encoder(struct intel_connector *connector,
 int intel_modeset_vga_set_state(struct drm_device *dev, bool state)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	unsigned reg = INTEL_INFO(dev)->gen >= 6 ? SNB_GMCH_CTRL : INTEL_GMCH_CTRL;
 	u16 gmch_ctrl;
 
-	pci_read_config_word(dev_priv->bridge_dev, INTEL_GMCH_CTRL, &gmch_ctrl);
+	pci_read_config_word(dev_priv->bridge_dev, reg, &gmch_ctrl);
 	if (state)
 		gmch_ctrl &= ~INTEL_GMCH_VGA_DISABLE;
 	else
 		gmch_ctrl |= INTEL_GMCH_VGA_DISABLE;
-	pci_write_config_word(dev_priv->bridge_dev, INTEL_GMCH_CTRL, gmch_ctrl);
+	pci_write_config_word(dev_priv->bridge_dev, reg, gmch_ctrl);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 3657ab43c8fd..26c29c173221 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5688,6 +5688,8 @@ static void __intel_set_power_well(struct drm_device *dev, bool enable)
 	unsigned long irqflags;
 	uint32_t tmp;
 
+	WARN_ON(dev_priv->pc8.enabled);
+
 	tmp = I915_READ(HSW_PWR_WELL_DRIVER);
 	is_enabled = tmp & HSW_PWR_WELL_STATE_ENABLED;
 	enable_requested = tmp & HSW_PWR_WELL_ENABLE_REQUEST;
@@ -5747,16 +5749,24 @@ static void __intel_set_power_well(struct drm_device *dev, bool enable)
 static void __intel_power_well_get(struct drm_device *dev,
 				   struct i915_power_well *power_well)
 {
-	if (!power_well->count++)
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (!power_well->count++) {
+		hsw_disable_package_c8(dev_priv);
 		__intel_set_power_well(dev, true);
+	}
 }
 
 static void __intel_power_well_put(struct drm_device *dev,
 				   struct i915_power_well *power_well)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
 	WARN_ON(!power_well->count);
-	if (!--power_well->count && i915_disable_power_well)
+	if (!--power_well->count && i915_disable_power_well) {
 		__intel_set_power_well(dev, false);
+		hsw_enable_package_c8(dev_priv);
+	}
 }
 
 void intel_display_power_get(struct drm_device *dev,
diff --git a/drivers/gpu/drm/qxl/Kconfig b/drivers/gpu/drm/qxl/Kconfig
index 037d324bf58f..66ac0ff95f5a 100644
--- a/drivers/gpu/drm/qxl/Kconfig
+++ b/drivers/gpu/drm/qxl/Kconfig
@@ -8,5 +8,6 @@ config DRM_QXL
         select DRM_KMS_HELPER
 	select DRM_KMS_FB_HELPER
         select DRM_TTM
+	select CRC32
 	help
 		QXL virtual GPU for Spice virtualization desktop integration. Do not enable this driver unless your distro ships a corresponding X.org QXL driver that can handle kernel modesetting.
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 5e827c29d194..d70aafb83307 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -24,7 +24,7 @@
  */
 
 
-#include "linux/crc32.h"
+#include <linux/crc32.h>
 
 #include "qxl_drv.h"
 #include "qxl_object.h"
diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c
index de86493cbc44..713a5d359901 100644
--- a/drivers/gpu/drm/radeon/dce6_afmt.c
+++ b/drivers/gpu/drm/radeon/dce6_afmt.c
@@ -174,7 +174,7 @@ void dce6_afmt_write_speaker_allocation(struct drm_encoder *encoder)
 	}
 
 	sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb);
-	if (sad_count < 0) {
+	if (sad_count <= 0) {
 		DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
 		return;
 	}
@@ -235,7 +235,7 @@ void dce6_afmt_write_sad_regs(struct drm_encoder *encoder)
 	}
 
 	sad_count = drm_edid_to_sad(radeon_connector->edid, &sads);
-	if (sad_count < 0) {
+	if (sad_count <= 0) {
 		DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
 		return;
 	}
@@ -308,7 +308,9 @@ int dce6_audio_init(struct radeon_device *rdev)
 	rdev->audio.enabled = true;
 
 	if (ASIC_IS_DCE8(rdev))
-		rdev->audio.num_pins = 7;
+		rdev->audio.num_pins = 6;
+	else if (ASIC_IS_DCE61(rdev))
+		rdev->audio.num_pins = 4;
 	else
 		rdev->audio.num_pins = 6;
 
diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index aa695c4feb3d..0c6d5cef4cf1 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c
@@ -118,7 +118,7 @@ static void dce4_afmt_write_speaker_allocation(struct drm_encoder *encoder)
 	}
 
 	sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb);
-	if (sad_count < 0) {
+	if (sad_count <= 0) {
 		DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
 		return;
 	}
@@ -173,7 +173,7 @@ static void evergreen_hdmi_write_sad_regs(struct drm_encoder *encoder)
 	}
 
 	sad_count = drm_edid_to_sad(radeon_connector->edid, &sads);
-	if (sad_count < 0) {
+	if (sad_count <= 0) {
 		DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
 		return;
 	}
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 11aab2ab54ce..f59a9e9fccf8 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -895,6 +895,10 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 		    (rdev->pdev->device == 0x999C)) {
 			rdev->config.cayman.max_simds_per_se = 6;
 			rdev->config.cayman.max_backends_per_se = 2;
+			rdev->config.cayman.max_hw_contexts = 8;
+			rdev->config.cayman.sx_max_export_size = 256;
+			rdev->config.cayman.sx_max_export_pos_size = 64;
+			rdev->config.cayman.sx_max_export_smx_size = 192;
 		} else if ((rdev->pdev->device == 0x9903) ||
 			   (rdev->pdev->device == 0x9904) ||
 			   (rdev->pdev->device == 0x990A) ||
@@ -905,6 +909,10 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 			   (rdev->pdev->device == 0x999D)) {
 			rdev->config.cayman.max_simds_per_se = 4;
 			rdev->config.cayman.max_backends_per_se = 2;
+			rdev->config.cayman.max_hw_contexts = 8;
+			rdev->config.cayman.sx_max_export_size = 256;
+			rdev->config.cayman.sx_max_export_pos_size = 64;
+			rdev->config.cayman.sx_max_export_smx_size = 192;
 		} else if ((rdev->pdev->device == 0x9919) ||
 			   (rdev->pdev->device == 0x9990) ||
 			   (rdev->pdev->device == 0x9991) ||
@@ -915,9 +923,17 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 			   (rdev->pdev->device == 0x99A0)) {
 			rdev->config.cayman.max_simds_per_se = 3;
 			rdev->config.cayman.max_backends_per_se = 1;
+			rdev->config.cayman.max_hw_contexts = 4;
+			rdev->config.cayman.sx_max_export_size = 128;
+			rdev->config.cayman.sx_max_export_pos_size = 32;
+			rdev->config.cayman.sx_max_export_smx_size = 96;
 		} else {
 			rdev->config.cayman.max_simds_per_se = 2;
 			rdev->config.cayman.max_backends_per_se = 1;
+			rdev->config.cayman.max_hw_contexts = 4;
+			rdev->config.cayman.sx_max_export_size = 128;
+			rdev->config.cayman.sx_max_export_pos_size = 32;
+			rdev->config.cayman.sx_max_export_smx_size = 96;
 		}
 		rdev->config.cayman.max_texture_channel_caches = 2;
 		rdev->config.cayman.max_gprs = 256;
@@ -925,10 +941,6 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 		rdev->config.cayman.max_gs_threads = 32;
 		rdev->config.cayman.max_stack_entries = 512;
 		rdev->config.cayman.sx_num_of_sets = 8;
-		rdev->config.cayman.sx_max_export_size = 256;
-		rdev->config.cayman.sx_max_export_pos_size = 64;
-		rdev->config.cayman.sx_max_export_smx_size = 192;
-		rdev->config.cayman.max_hw_contexts = 8;
 		rdev->config.cayman.sq_num_cf_insts = 2;
 
 		rdev->config.cayman.sc_prim_fifo_size = 0x40;
diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c
index 913b025ae9b3..374499db20c7 100644
--- a/drivers/gpu/drm/radeon/rv770_dpm.c
+++ b/drivers/gpu/drm/radeon/rv770_dpm.c
@@ -2328,6 +2328,12 @@ void rv770_get_engine_memory_ss(struct radeon_device *rdev)
 	pi->mclk_ss = radeon_atombios_get_asic_ss_info(rdev, &ss,
 						       ASIC_INTERNAL_MEMORY_SS, 0);
 
+	/* disable ss, causes hangs on some cayman boards */
+	if (rdev->family == CHIP_CAYMAN) {
+		pi->sclk_ss = false;
+		pi->mclk_ss = false;
+	}
+
 	if (pi->sclk_ss || pi->mclk_ss)
 		pi->dynamic_ss = true;
 	else
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 15b86a94949d..406152152315 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -353,7 +353,8 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
 	 * Don't move nonexistent data. Clear destination instead.
 	 */
 	if (old_iomap == NULL &&
-	    (ttm == NULL || ttm->state == tt_unpopulated)) {
+	    (ttm == NULL || (ttm->state == tt_unpopulated &&
+			     !(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)))) {
 		memset_io(new_iomap, 0, new_mem->num_pages*PAGE_SIZE);
 		goto out2;
 	}
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index f0c5e07c25ec..bcb49502c3bf 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -301,7 +301,7 @@ err:
 	return -ENOMEM;
 }
 
-void hv_synic_free_cpu(int cpu)
+static void hv_synic_free_cpu(int cpu)
 {
 	kfree(hv_context.event_dpc[cpu]);
 	if (hv_context.synic_event_page[cpu])
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 92d1206482a6..f80b700f821c 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -377,6 +377,9 @@ static int intel_idle(struct cpuidle_device *dev,
 
 	if (!current_set_polling_and_test()) {
 
+		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
+			clflush((void *)&current_thread_info()->flags);
+
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
 		if (!need_resched())
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index c47c2034ca71..0717940ec3b5 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -181,9 +181,16 @@ static void add_ref(struct iw_cm_id *cm_id)
 static void rem_ref(struct iw_cm_id *cm_id)
 {
 	struct iwcm_id_private *cm_id_priv;
+	int cb_destroy;
+
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
-	if (iwcm_deref_id(cm_id_priv) &&
-	    test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
+
+	/*
+	 * Test bit before deref in case the cm_id gets freed on another
+	 * thread.
+	 */
+	cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+	if (iwcm_deref_id(cm_id_priv) && cb_destroy) {
 		BUG_ON(!list_empty(&cm_id_priv->work_list));
 		free_cm_id(cm_id_priv);
 	}
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index bdc842e9faef..a283274a5a09 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -49,12 +49,20 @@
 
 #define INIT_UDATA(udata, ibuf, obuf, ilen, olen)			\
 	do {								\
-		(udata)->inbuf  = (void __user *) (ibuf);		\
+		(udata)->inbuf  = (const void __user *) (ibuf);		\
 		(udata)->outbuf = (void __user *) (obuf);		\
 		(udata)->inlen  = (ilen);				\
 		(udata)->outlen = (olen);				\
 	} while (0)
 
+#define INIT_UDATA_BUF_OR_NULL(udata, ibuf, obuf, ilen, olen)			\
+	do {									\
+		(udata)->inbuf  = (ilen) ? (const void __user *) (ibuf) : NULL;	\
+		(udata)->outbuf = (olen) ? (void __user *) (obuf) : NULL;	\
+		(udata)->inlen  = (ilen);					\
+		(udata)->outlen = (olen);					\
+	} while (0)
+
 /*
  * Our lifetime rules for these structs are the following:
  *
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 65f6e7dc380c..f1cc83855af6 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2593,6 +2593,9 @@ out_put:
 static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
 				union ib_flow_spec *ib_spec)
 {
+	if (kern_spec->reserved)
+		return -EINVAL;
+
 	ib_spec->type = kern_spec->type;
 
 	switch (ib_spec->type) {
@@ -2646,6 +2649,9 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 	void *ib_spec;
 	int i;
 
+	if (ucore->inlen < sizeof(cmd))
+		return -EINVAL;
+
 	if (ucore->outlen < sizeof(resp))
 		return -ENOSPC;
 
@@ -2671,6 +2677,10 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 	    (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
 		return -EINVAL;
 
+	if (cmd.flow_attr.reserved[0] ||
+	    cmd.flow_attr.reserved[1])
+		return -EINVAL;
+
 	if (cmd.flow_attr.num_of_specs) {
 		kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size,
 					 GFP_KERNEL);
@@ -2731,6 +2741,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 	if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
 		pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
 			i, cmd.flow_attr.size);
+		err = -EINVAL;
 		goto err_free;
 	}
 	flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
@@ -2791,10 +2802,16 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
 	struct ib_uobject		*uobj;
 	int				ret;
 
+	if (ucore->inlen < sizeof(cmd))
+		return -EINVAL;
+
 	ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
 	if (ret)
 		return ret;
 
+	if (cmd.comp_mask)
+		return -EINVAL;
+
 	uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
 			      file->ucontext);
 	if (!uobj)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 34386943ebcf..08219fb3338b 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -668,25 +668,30 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 		if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
 			return -EINVAL;
 
+		if (ex_hdr.cmd_hdr_reserved)
+			return -EINVAL;
+
 		if (ex_hdr.response) {
 			if (!hdr.out_words && !ex_hdr.provider_out_words)
 				return -EINVAL;
+
+			if (!access_ok(VERIFY_WRITE,
+				       (void __user *) (unsigned long) ex_hdr.response,
+				       (hdr.out_words + ex_hdr.provider_out_words) * 8))
+				return -EFAULT;
 		} else {
 			if (hdr.out_words || ex_hdr.provider_out_words)
 				return -EINVAL;
 		}
 
-		INIT_UDATA(&ucore,
-			   (hdr.in_words) ? buf : 0,
-			   (unsigned long)ex_hdr.response,
-			   hdr.in_words * 8,
-			   hdr.out_words * 8);
-
-		INIT_UDATA(&uhw,
-			   (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0,
-			   (ex_hdr.provider_out_words) ? (unsigned long)ex_hdr.response + ucore.outlen : 0,
-			   ex_hdr.provider_in_words * 8,
-			   ex_hdr.provider_out_words * 8);
+		INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response,
+				       hdr.in_words * 8, hdr.out_words * 8);
+
+		INIT_UDATA_BUF_OR_NULL(&uhw,
+				       buf + ucore.inlen,
+				       (unsigned long) ex_hdr.response + ucore.outlen,
+				       ex_hdr.provider_in_words * 8,
+				       ex_hdr.provider_out_words * 8);
 
 		err = uverbs_ex_cmd_table[command](file,
 						   &ucore,
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 4cb8eb24497c..84e45006451c 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -173,7 +173,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
 	return ret;
 }
 
-int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data)
+static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data)
 {
 	u32 remain = len;
 	u32 dmalen;
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 2b46bf1d7e40..4c9852d92b0a 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -421,9 +421,11 @@ out:
 
 	if (watermark <= WATERMARK_METADATA) {
 		SET_GC_MARK(b, GC_MARK_METADATA);
+		SET_GC_MOVE(b, 0);
 		b->prio = BTREE_PRIO;
 	} else {
 		SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
+		SET_GC_MOVE(b, 0);
 		b->prio = INITIAL_PRIO;
 	}
 
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 4beb55a0ff30..754f43177483 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -197,7 +197,7 @@ struct bucket {
 	uint8_t		disk_gen;
 	uint8_t		last_gc; /* Most out of date gen in the btree */
 	uint8_t		gc_gen;
-	uint16_t	gc_mark;
+	uint16_t	gc_mark; /* Bitfield used by GC. See below for field */
 };
 
 /*
@@ -209,7 +209,8 @@ BITMASK(GC_MARK,	 struct bucket, gc_mark, 0, 2);
 #define GC_MARK_RECLAIMABLE	0
 #define GC_MARK_DIRTY		1
 #define GC_MARK_METADATA	2
-BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 14);
+BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 13);
+BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1);
 
 #include "journal.h"
 #include "stats.h"
@@ -372,14 +373,14 @@ struct cached_dev {
 	unsigned char		writeback_percent;
 	unsigned		writeback_delay;
 
-	int			writeback_rate_change;
-	int64_t			writeback_rate_derivative;
 	uint64_t		writeback_rate_target;
+	int64_t			writeback_rate_proportional;
+	int64_t			writeback_rate_derivative;
+	int64_t			writeback_rate_change;
 
 	unsigned		writeback_rate_update_seconds;
 	unsigned		writeback_rate_d_term;
 	unsigned		writeback_rate_p_term_inverse;
-	unsigned		writeback_rate_d_smooth;
 };
 
 enum alloc_watermarks {
@@ -445,7 +446,6 @@ struct cache {
 	 * call prio_write() to keep gens from wrapping.
 	 */
 	uint8_t			need_save_prio;
-	unsigned		gc_move_threshold;
 
 	/*
 	 * If nonzero, we know we aren't going to find any buckets to invalidate
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 5e2765aadce1..31bb53fcc67a 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1561,6 +1561,28 @@ size_t bch_btree_gc_finish(struct cache_set *c)
 		SET_GC_MARK(PTR_BUCKET(c, &c->uuid_bucket, i),
 			    GC_MARK_METADATA);
 
+	/* don't reclaim buckets to which writeback keys point */
+	rcu_read_lock();
+	for (i = 0; i < c->nr_uuids; i++) {
+		struct bcache_device *d = c->devices[i];
+		struct cached_dev *dc;
+		struct keybuf_key *w, *n;
+		unsigned j;
+
+		if (!d || UUID_FLASH_ONLY(&c->uuids[i]))
+			continue;
+		dc = container_of(d, struct cached_dev, disk);
+
+		spin_lock(&dc->writeback_keys.lock);
+		rbtree_postorder_for_each_entry_safe(w, n,
+					&dc->writeback_keys.keys, node)
+			for (j = 0; j < KEY_PTRS(&w->key); j++)
+				SET_GC_MARK(PTR_BUCKET(c, &w->key, j),
+					    GC_MARK_DIRTY);
+		spin_unlock(&dc->writeback_keys.lock);
+	}
+	rcu_read_unlock();
+
 	for_each_cache(ca, c, i) {
 		uint64_t *i;
 
@@ -1817,7 +1839,8 @@ static bool fix_overlapping_extents(struct btree *b, struct bkey *insert,
 			if (KEY_START(k) > KEY_START(insert) + sectors_found)
 				goto check_failed;
 
-			if (KEY_PTRS(replace_key) != KEY_PTRS(k))
+			if (KEY_PTRS(k) != KEY_PTRS(replace_key) ||
+			    KEY_DIRTY(k) != KEY_DIRTY(replace_key))
 				goto check_failed;
 
 			/* skip past gen */
@@ -2217,7 +2240,7 @@ struct btree_insert_op {
 	struct bkey	*replace_key;
 };
 
-int btree_insert_fn(struct btree_op *b_op, struct btree *b)
+static int btree_insert_fn(struct btree_op *b_op, struct btree *b)
 {
 	struct btree_insert_op *op = container_of(b_op,
 					struct btree_insert_op, op);
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 7c1275e66025..f2f0998c4a91 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -25,10 +25,9 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k)
 	unsigned i;
 
 	for (i = 0; i < KEY_PTRS(k); i++) {
-		struct cache *ca = PTR_CACHE(c, k, i);
 		struct bucket *g = PTR_BUCKET(c, k, i);
 
-		if (GC_SECTORS_USED(g) < ca->gc_move_threshold)
+		if (GC_MOVE(g))
 			return true;
 	}
 
@@ -65,11 +64,16 @@ static void write_moving_finish(struct closure *cl)
 
 static void read_moving_endio(struct bio *bio, int error)
 {
+	struct bbio *b = container_of(bio, struct bbio, bio);
 	struct moving_io *io = container_of(bio->bi_private,
 					    struct moving_io, cl);
 
 	if (error)
 		io->op.error = error;
+	else if (!KEY_DIRTY(&b->key) &&
+		 ptr_stale(io->op.c, &b->key, 0)) {
+		io->op.error = -EINTR;
+	}
 
 	bch_bbio_endio(io->op.c, bio, error, "reading data to move");
 }
@@ -141,6 +145,11 @@ static void read_moving(struct cache_set *c)
 		if (!w)
 			break;
 
+		if (ptr_stale(c, &w->key, 0)) {
+			bch_keybuf_del(&c->moving_gc_keys, w);
+			continue;
+		}
+
 		io = kzalloc(sizeof(struct moving_io) + sizeof(struct bio_vec)
 			     * DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS),
 			     GFP_KERNEL);
@@ -184,7 +193,8 @@ static bool bucket_cmp(struct bucket *l, struct bucket *r)
 
 static unsigned bucket_heap_top(struct cache *ca)
 {
-	return GC_SECTORS_USED(heap_peek(&ca->heap));
+	struct bucket *b;
+	return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0;
 }
 
 void bch_moving_gc(struct cache_set *c)
@@ -226,9 +236,8 @@ void bch_moving_gc(struct cache_set *c)
 			sectors_to_move -= GC_SECTORS_USED(b);
 		}
 
-		ca->gc_move_threshold = bucket_heap_top(ca);
-
-		pr_debug("threshold %u", ca->gc_move_threshold);
+		while (heap_pop(&ca->heap, b, bucket_cmp))
+			SET_GC_MOVE(b, 1);
 	}
 
 	mutex_unlock(&c->bucket_lock);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index dec15cd2d797..c57bfa071a57 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1676,7 +1676,7 @@ err:
 static bool can_attach_cache(struct cache *ca, struct cache_set *c)
 {
 	return ca->sb.block_size	== c->sb.block_size &&
-		ca->sb.bucket_size	== c->sb.block_size &&
+		ca->sb.bucket_size	== c->sb.bucket_size &&
 		ca->sb.nr_in_set	== c->sb.nr_in_set;
 }
 
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 80d4c2bee18a..a1f85612f0b3 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -83,7 +83,6 @@ rw_attribute(writeback_rate);
 rw_attribute(writeback_rate_update_seconds);
 rw_attribute(writeback_rate_d_term);
 rw_attribute(writeback_rate_p_term_inverse);
-rw_attribute(writeback_rate_d_smooth);
 read_attribute(writeback_rate_debug);
 
 read_attribute(stripe_size);
@@ -129,31 +128,41 @@ SHOW(__bch_cached_dev)
 	var_printf(writeback_running,	"%i");
 	var_print(writeback_delay);
 	var_print(writeback_percent);
-	sysfs_print(writeback_rate,	dc->writeback_rate.rate);
+	sysfs_hprint(writeback_rate,	dc->writeback_rate.rate << 9);
 
 	var_print(writeback_rate_update_seconds);
 	var_print(writeback_rate_d_term);
 	var_print(writeback_rate_p_term_inverse);
-	var_print(writeback_rate_d_smooth);
 
 	if (attr == &sysfs_writeback_rate_debug) {
+		char rate[20];
 		char dirty[20];
-		char derivative[20];
 		char target[20];
-		bch_hprint(dirty,
-			   bcache_dev_sectors_dirty(&dc->disk) << 9);
-		bch_hprint(derivative,	dc->writeback_rate_derivative << 9);
+		char proportional[20];
+		char derivative[20];
+		char change[20];
+		s64 next_io;
+
+		bch_hprint(rate,	dc->writeback_rate.rate << 9);
+		bch_hprint(dirty,	bcache_dev_sectors_dirty(&dc->disk) << 9);
 		bch_hprint(target,	dc->writeback_rate_target << 9);
+		bch_hprint(proportional,dc->writeback_rate_proportional << 9);
+		bch_hprint(derivative,	dc->writeback_rate_derivative << 9);
+		bch_hprint(change,	dc->writeback_rate_change << 9);
+
+		next_io = div64_s64(dc->writeback_rate.next - local_clock(),
+				    NSEC_PER_MSEC);
 
 		return sprintf(buf,
-			       "rate:\t\t%u\n"
-			       "change:\t\t%i\n"
+			       "rate:\t\t%s/sec\n"
 			       "dirty:\t\t%s\n"
+			       "target:\t\t%s\n"
+			       "proportional:\t%s\n"
 			       "derivative:\t%s\n"
-			       "target:\t\t%s\n",
-			       dc->writeback_rate.rate,
-			       dc->writeback_rate_change,
-			       dirty, derivative, target);
+			       "change:\t\t%s/sec\n"
+			       "next io:\t%llims\n",
+			       rate, dirty, target, proportional,
+			       derivative, change, next_io);
 	}
 
 	sysfs_hprint(dirty_data,
@@ -189,6 +198,7 @@ STORE(__cached_dev)
 	struct kobj_uevent_env *env;
 
 #define d_strtoul(var)		sysfs_strtoul(var, dc->var)
+#define d_strtoul_nonzero(var)	sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX)
 #define d_strtoi_h(var)		sysfs_hatoi(var, dc->var)
 
 	sysfs_strtoul(data_csum,	dc->disk.data_csum);
@@ -197,16 +207,15 @@ STORE(__cached_dev)
 	d_strtoul(writeback_metadata);
 	d_strtoul(writeback_running);
 	d_strtoul(writeback_delay);
-	sysfs_strtoul_clamp(writeback_rate,
-			    dc->writeback_rate.rate, 1, 1000000);
+
 	sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40);
 
-	d_strtoul(writeback_rate_update_seconds);
+	sysfs_strtoul_clamp(writeback_rate,
+			    dc->writeback_rate.rate, 1, INT_MAX);
+
+	d_strtoul_nonzero(writeback_rate_update_seconds);
 	d_strtoul(writeback_rate_d_term);
-	d_strtoul(writeback_rate_p_term_inverse);
-	sysfs_strtoul_clamp(writeback_rate_p_term_inverse,
-			    dc->writeback_rate_p_term_inverse, 1, INT_MAX);
-	d_strtoul(writeback_rate_d_smooth);
+	d_strtoul_nonzero(writeback_rate_p_term_inverse);
 
 	d_strtoi_h(sequential_cutoff);
 	d_strtoi_h(readahead);
@@ -313,7 +322,6 @@ static struct attribute *bch_cached_dev_files[] = {
 	&sysfs_writeback_rate_update_seconds,
 	&sysfs_writeback_rate_d_term,
 	&sysfs_writeback_rate_p_term_inverse,
-	&sysfs_writeback_rate_d_smooth,
 	&sysfs_writeback_rate_debug,
 	&sysfs_dirty_data,
 	&sysfs_stripe_size,
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index 462214eeacbe..bb37618e7664 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -209,7 +209,13 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
 {
 	uint64_t now = local_clock();
 
-	d->next += div_u64(done, d->rate);
+	d->next += div_u64(done * NSEC_PER_SEC, d->rate);
+
+	if (time_before64(now + NSEC_PER_SEC, d->next))
+		d->next = now + NSEC_PER_SEC;
+
+	if (time_after64(now - NSEC_PER_SEC * 2, d->next))
+		d->next = now - NSEC_PER_SEC * 2;
 
 	return time_after64(d->next, now)
 		? div_u64(d->next - now, NSEC_PER_SEC / HZ)
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 362c4b3f8b4a..1030c6020e98 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -110,7 +110,7 @@ do {									\
 	_r;								\
 })
 
-#define heap_peek(h)	((h)->size ? (h)->data[0] : NULL)
+#define heap_peek(h)	((h)->used ? (h)->data[0] : NULL)
 
 #define heap_full(h)	((h)->used == (h)->size)
 
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 99053b1251be..6c44fe059c27 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -30,38 +30,40 @@ static void __update_writeback_rate(struct cached_dev *dc)
 
 	/* PD controller */
 
-	int change = 0;
-	int64_t error;
 	int64_t dirty = bcache_dev_sectors_dirty(&dc->disk);
 	int64_t derivative = dirty - dc->disk.sectors_dirty_last;
+	int64_t proportional = dirty - target;
+	int64_t change;
 
 	dc->disk.sectors_dirty_last = dirty;
 
-	derivative *= dc->writeback_rate_d_term;
-	derivative = clamp(derivative, -dirty, dirty);
+	/* Scale to sectors per second */
 
-	derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative,
-			      dc->writeback_rate_d_smooth, 0);
+	proportional *= dc->writeback_rate_update_seconds;
+	proportional = div_s64(proportional, dc->writeback_rate_p_term_inverse);
 
-	/* Avoid divide by zero */
-	if (!target)
-		goto out;
+	derivative = div_s64(derivative, dc->writeback_rate_update_seconds);
 
-	error = div64_s64((dirty + derivative - target) << 8, target);
+	derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative,
+			      (dc->writeback_rate_d_term /
+			       dc->writeback_rate_update_seconds) ?: 1, 0);
+
+	derivative *= dc->writeback_rate_d_term;
+	derivative = div_s64(derivative, dc->writeback_rate_p_term_inverse);
 
-	change = div_s64((dc->writeback_rate.rate * error) >> 8,
-			 dc->writeback_rate_p_term_inverse);
+	change = proportional + derivative;
 
 	/* Don't increase writeback rate if the device isn't keeping up */
 	if (change > 0 &&
 	    time_after64(local_clock(),
-			 dc->writeback_rate.next + 10 * NSEC_PER_MSEC))
+			 dc->writeback_rate.next + NSEC_PER_MSEC))
 		change = 0;
 
 	dc->writeback_rate.rate =
-		clamp_t(int64_t, dc->writeback_rate.rate + change,
+		clamp_t(int64_t, (int64_t) dc->writeback_rate.rate + change,
 			1, NSEC_PER_MSEC);
-out:
+
+	dc->writeback_rate_proportional = proportional;
 	dc->writeback_rate_derivative = derivative;
 	dc->writeback_rate_change = change;
 	dc->writeback_rate_target = target;
@@ -87,15 +89,11 @@ static void update_writeback_rate(struct work_struct *work)
 
 static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
 {
-	uint64_t ret;
-
 	if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
 	    !dc->writeback_percent)
 		return 0;
 
-	ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
-
-	return min_t(uint64_t, ret, HZ);
+	return bch_next_delay(&dc->writeback_rate, sectors);
 }
 
 struct dirty_io {
@@ -241,7 +239,7 @@ static void read_dirty(struct cached_dev *dc)
 		if (KEY_START(&w->key) != dc->last_read ||
 		    jiffies_to_msecs(delay) > 50)
 			while (!kthread_should_stop() && delay)
-				delay = schedule_timeout_interruptible(delay);
+				delay = schedule_timeout_uninterruptible(delay);
 
 		dc->last_read	= KEY_OFFSET(&w->key);
 
@@ -438,7 +436,7 @@ static int bch_writeback_thread(void *arg)
 			while (delay &&
 			       !kthread_should_stop() &&
 			       !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
-				delay = schedule_timeout_interruptible(delay);
+				delay = schedule_timeout_uninterruptible(delay);
 		}
 	}
 
@@ -476,6 +474,8 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
 
 	bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0),
 			   sectors_dirty_init_fn, 0);
+
+	dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk);
 }
 
 int bch_cached_dev_writeback_init(struct cached_dev *dc)
@@ -490,18 +490,15 @@ int bch_cached_dev_writeback_init(struct cached_dev *dc)
 	dc->writeback_delay		= 30;
 	dc->writeback_rate.rate		= 1024;
 
-	dc->writeback_rate_update_seconds = 30;
-	dc->writeback_rate_d_term	= 16;
-	dc->writeback_rate_p_term_inverse = 64;
-	dc->writeback_rate_d_smooth	= 8;
+	dc->writeback_rate_update_seconds = 5;
+	dc->writeback_rate_d_term	= 30;
+	dc->writeback_rate_p_term_inverse = 6000;
 
 	dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
 					      "bcache_writeback");
 	if (IS_ERR(dc->writeback_thread))
 		return PTR_ERR(dc->writeback_thread);
 
-	set_task_state(dc->writeback_thread, TASK_INTERRUPTIBLE);
-
 	INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
 	schedule_delayed_work(&dc->writeback_rate_update,
 			      dc->writeback_rate_update_seconds * HZ);
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index a3e291d0df9a..6cb388e8fb7d 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -525,4 +525,5 @@ source "drivers/misc/altera-stapl/Kconfig"
 source "drivers/misc/mei/Kconfig"
 source "drivers/misc/vmw_vmci/Kconfig"
 source "drivers/misc/mic/Kconfig"
+source "drivers/misc/genwqe/Kconfig"
 endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index f45473e68bf7..99b9424ce31d 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -53,3 +53,4 @@ obj-$(CONFIG_VMWARE_VMCI)	+= vmw_vmci/
 obj-$(CONFIG_LATTICE_ECP3_CONFIG)	+= lattice-ecp3-config.o
 obj-$(CONFIG_SRAM)		+= sram.o
 obj-y				+= mic/
+obj-$(CONFIG_GENWQE)		+= genwqe/
diff --git a/drivers/misc/ad525x_dpot.c b/drivers/misc/ad525x_dpot.c
index 0daadcf1ed7a..d3eee113baeb 100644
--- a/drivers/misc/ad525x_dpot.c
+++ b/drivers/misc/ad525x_dpot.c
@@ -641,7 +641,7 @@ static const struct attribute_group ad525x_group_commands = {
 	.attrs = ad525x_attributes_commands,
 };
 
-int ad_dpot_add_files(struct device *dev,
+static int ad_dpot_add_files(struct device *dev,
 		unsigned features, unsigned rdac)
 {
 	int err = sysfs_create_file(&dev->kobj,
@@ -666,7 +666,7 @@ int ad_dpot_add_files(struct device *dev,
 	return err;
 }
 
-inline void ad_dpot_remove_files(struct device *dev,
+static inline void ad_dpot_remove_files(struct device *dev,
 		unsigned features, unsigned rdac)
 {
 	sysfs_remove_file(&dev->kobj,
diff --git a/drivers/misc/bmp085-i2c.c b/drivers/misc/bmp085-i2c.c
index 3abfcecf8424..a7c16295b816 100644
--- a/drivers/misc/bmp085-i2c.c
+++ b/drivers/misc/bmp085-i2c.c
@@ -49,7 +49,7 @@ static int bmp085_i2c_probe(struct i2c_client *client,
 		return err;
 	}
 
-	return bmp085_probe(&client->dev, regmap);
+	return bmp085_probe(&client->dev, regmap, client->irq);
 }
 
 static int bmp085_i2c_remove(struct i2c_client *client)
diff --git a/drivers/misc/bmp085-spi.c b/drivers/misc/bmp085-spi.c
index d6a52659cf24..864ecac32373 100644
--- a/drivers/misc/bmp085-spi.c
+++ b/drivers/misc/bmp085-spi.c
@@ -41,7 +41,7 @@ static int bmp085_spi_probe(struct spi_device *client)
 		return err;
 	}
 
-	return bmp085_probe(&client->dev, regmap);
+	return bmp085_probe(&client->dev, regmap, client->irq);
 }
 
 static int bmp085_spi_remove(struct spi_device *client)
diff --git a/drivers/misc/bmp085.c b/drivers/misc/bmp085.c
index 2704d885a9b3..820e53d0048f 100644
--- a/drivers/misc/bmp085.c
+++ b/drivers/misc/bmp085.c
@@ -49,9 +49,11 @@
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/delay.h>
 #include <linux/of.h>
 #include "bmp085.h"
+#include <linux/interrupt.h>
+#include <linux/completion.h>
+#include <linux/gpio.h>
 
 #define BMP085_CHIP_ID			0x55
 #define BMP085_CALIBRATION_DATA_START	0xAA
@@ -84,8 +86,19 @@ struct bmp085_data {
 	unsigned long last_temp_measurement;
 	u8	chip_id;
 	s32	b6; /* calculated temperature correction coefficient */
+	int	irq;
+	struct	completion done;
 };
 
+static irqreturn_t bmp085_eoc_isr(int irq, void *devid)
+{
+	struct bmp085_data *data = devid;
+
+	complete(&data->done);
+
+	return IRQ_HANDLED;
+}
+
 static s32 bmp085_read_calibration_data(struct bmp085_data *data)
 {
 	u16 tmp[BMP085_CALIBRATION_DATA_LENGTH];
@@ -116,6 +129,9 @@ static s32 bmp085_update_raw_temperature(struct bmp085_data *data)
 	s32 status;
 
 	mutex_lock(&data->lock);
+
+	init_completion(&data->done);
+
 	status = regmap_write(data->regmap, BMP085_CTRL_REG,
 			      BMP085_TEMP_MEASUREMENT);
 	if (status < 0) {
@@ -123,7 +139,8 @@ static s32 bmp085_update_raw_temperature(struct bmp085_data *data)
 			"Error while requesting temperature measurement.\n");
 		goto exit;
 	}
-	msleep(BMP085_TEMP_CONVERSION_TIME);
+	wait_for_completion_timeout(&data->done, 1 + msecs_to_jiffies(
+					    BMP085_TEMP_CONVERSION_TIME));
 
 	status = regmap_bulk_read(data->regmap, BMP085_CONVERSION_REGISTER_MSB,
 				 &tmp, sizeof(tmp));
@@ -147,6 +164,9 @@ static s32 bmp085_update_raw_pressure(struct bmp085_data *data)
 	s32 status;
 
 	mutex_lock(&data->lock);
+
+	init_completion(&data->done);
+
 	status = regmap_write(data->regmap, BMP085_CTRL_REG,
 			BMP085_PRESSURE_MEASUREMENT +
 			(data->oversampling_setting << 6));
@@ -157,8 +177,8 @@ static s32 bmp085_update_raw_pressure(struct bmp085_data *data)
 	}
 
 	/* wait for the end of conversion */
-	msleep(2+(3 << data->oversampling_setting));
-
+	wait_for_completion_timeout(&data->done, 1 + msecs_to_jiffies(
+					2+(3 << data->oversampling_setting)));
 	/* copy data into a u32 (4 bytes), but skip the first byte. */
 	status = regmap_bulk_read(data->regmap, BMP085_CONVERSION_REGISTER_MSB,
 				 ((u8 *)&tmp)+1, 3);
@@ -420,7 +440,7 @@ struct regmap_config bmp085_regmap_config = {
 };
 EXPORT_SYMBOL_GPL(bmp085_regmap_config);
 
-int bmp085_probe(struct device *dev, struct regmap *regmap)
+int bmp085_probe(struct device *dev, struct regmap *regmap, int irq)
 {
 	struct bmp085_data *data;
 	int err = 0;
@@ -434,6 +454,15 @@ int bmp085_probe(struct device *dev, struct regmap *regmap)
 	dev_set_drvdata(dev, data);
 	data->dev = dev;
 	data->regmap = regmap;
+	data->irq = irq;
+
+	if (data->irq > 0) {
+		err = devm_request_irq(dev, data->irq, bmp085_eoc_isr,
+					      IRQF_TRIGGER_RISING, "bmp085",
+					      data);
+		if (err < 0)
+			goto exit_free;
+	}
 
 	/* Initialize the BMP085 chip */
 	err = bmp085_init_client(data);
diff --git a/drivers/misc/bmp085.h b/drivers/misc/bmp085.h
index 2b8f615bca92..8b8e3b1f5ca5 100644
--- a/drivers/misc/bmp085.h
+++ b/drivers/misc/bmp085.h
@@ -26,7 +26,7 @@
 
 extern struct regmap_config bmp085_regmap_config;
 
-int bmp085_probe(struct device *dev, struct regmap *regmap);
+int bmp085_probe(struct device *dev, struct regmap *regmap, int irq);
 int bmp085_remove(struct device *dev);
 int bmp085_detect(struct device *dev);
 
diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c
index 3a015abb444a..78e55b501c94 100644
--- a/drivers/misc/eeprom/eeprom_93xx46.c
+++ b/drivers/misc/eeprom/eeprom_93xx46.c
@@ -378,7 +378,6 @@ static int eeprom_93xx46_remove(struct spi_device *spi)
 		device_remove_file(&spi->dev, &dev_attr_erase);
 
 	sysfs_remove_bin_file(&spi->dev.kobj, &edev->bin);
-	spi_set_drvdata(spi, NULL);
 	kfree(edev);
 	return 0;
 }
diff --git a/drivers/misc/genwqe/Kconfig b/drivers/misc/genwqe/Kconfig
new file mode 100644
index 000000000000..6069d8cd79d7
--- /dev/null
+++ b/drivers/misc/genwqe/Kconfig
@@ -0,0 +1,13 @@
+#
+# IBM Accelerator Family 'GenWQE'
+#
+
+menuconfig GENWQE
+	tristate "GenWQE PCIe Accelerator"
+	depends on PCI && 64BIT
+	select CRC_ITU_T
+	default n
+	help
+	  Enables PCIe card driver for IBM GenWQE accelerators.
+	  The user-space interface is described in
+	  include/linux/genwqe/genwqe_card.h.
diff --git a/drivers/misc/genwqe/Makefile b/drivers/misc/genwqe/Makefile
new file mode 100644
index 000000000000..98a2b4f0b18b
--- /dev/null
+++ b/drivers/misc/genwqe/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for GenWQE driver
+#
+
+obj-$(CONFIG_GENWQE) := genwqe_card.o
+genwqe_card-objs := card_base.o card_dev.o card_ddcb.o card_sysfs.o \
+	card_debugfs.o card_utils.o
diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c
new file mode 100644
index 000000000000..74d51c9bb858
--- /dev/null
+++ b/drivers/misc/genwqe/card_base.c
@@ -0,0 +1,1205 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Module initialization and PCIe setup. Card health monitoring and
+ * recovery functionality. Character device creation and deletion are
+ * controlled from here.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/err.h>
+#include <linux/aer.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/device.h>
+#include <linux/log2.h>
+#include <linux/genwqe/genwqe_card.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+MODULE_AUTHOR("Frank Haverkamp <haver@linux.vnet.ibm.com>");
+MODULE_AUTHOR("Michael Ruettger <michael@ibmra.de>");
+MODULE_AUTHOR("Joerg-Stephan Vogt <jsvogt@de.ibm.com>");
+MODULE_AUTHOR("Michal Jung <mijung@de.ibm.com>");
+
+MODULE_DESCRIPTION("GenWQE Card");
+MODULE_VERSION(DRV_VERS_STRING);
+MODULE_LICENSE("GPL");
+
+static char genwqe_driver_name[] = GENWQE_DEVNAME;
+static struct class *class_genwqe;
+static struct dentry *debugfs_genwqe;
+static struct genwqe_dev *genwqe_devices[GENWQE_CARD_NO_MAX];
+
+/* PCI structure for identifying device by PCI vendor and device ID */
+static DEFINE_PCI_DEVICE_TABLE(genwqe_device_table) = {
+	{ .vendor      = PCI_VENDOR_ID_IBM,
+	  .device      = PCI_DEVICE_GENWQE,
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5,
+	  .class       = (PCI_CLASSCODE_GENWQE5 << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	/* Initial SR-IOV bring-up image */
+	{ .vendor      = PCI_VENDOR_ID_IBM,
+	  .device      = PCI_DEVICE_GENWQE,
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV,
+	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	{ .vendor      = PCI_VENDOR_ID_IBM,  /* VF Vendor ID */
+	  .device      = 0x0000,  /* VF Device ID */
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV,
+	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	/* Fixed up image */
+	{ .vendor      = PCI_VENDOR_ID_IBM,
+	  .device      = PCI_DEVICE_GENWQE,
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5,
+	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	{ .vendor      = PCI_VENDOR_ID_IBM,  /* VF Vendor ID */
+	  .device      = 0x0000,  /* VF Device ID */
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5,
+	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	/* Even one more ... */
+	{ .vendor      = PCI_VENDOR_ID_IBM,
+	  .device      = PCI_DEVICE_GENWQE,
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5_NEW,
+	  .class       = (PCI_CLASSCODE_GENWQE5 << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	{ 0, }			/* 0 terminated list. */
+};
+
+MODULE_DEVICE_TABLE(pci, genwqe_device_table);
+
+/**
+ * genwqe_dev_alloc() - Create and prepare a new card descriptor
+ *
+ * Return: Pointer to card descriptor, or ERR_PTR(err) on error
+ */
+static struct genwqe_dev *genwqe_dev_alloc(void)
+{
+	unsigned int i = 0, j;
+	struct genwqe_dev *cd;
+
+	for (i = 0; i < GENWQE_CARD_NO_MAX; i++) {
+		if (genwqe_devices[i] == NULL)
+			break;
+	}
+	if (i >= GENWQE_CARD_NO_MAX)
+		return ERR_PTR(-ENODEV);
+
+	cd = kzalloc(sizeof(struct genwqe_dev), GFP_KERNEL);
+	if (!cd)
+		return ERR_PTR(-ENOMEM);
+
+	cd->card_idx = i;
+	cd->class_genwqe = class_genwqe;
+	cd->debugfs_genwqe = debugfs_genwqe;
+
+	init_waitqueue_head(&cd->queue_waitq);
+
+	spin_lock_init(&cd->file_lock);
+	INIT_LIST_HEAD(&cd->file_list);
+
+	cd->card_state = GENWQE_CARD_UNUSED;
+	spin_lock_init(&cd->print_lock);
+
+	cd->ddcb_software_timeout = genwqe_ddcb_software_timeout;
+	cd->kill_timeout = genwqe_kill_timeout;
+
+	for (j = 0; j < GENWQE_MAX_VFS; j++)
+		cd->vf_jobtimeout_msec[j] = genwqe_vf_jobtimeout_msec;
+
+	genwqe_devices[i] = cd;
+	return cd;
+}
+
+static void genwqe_dev_free(struct genwqe_dev *cd)
+{
+	if (!cd)
+		return;
+
+	genwqe_devices[cd->card_idx] = NULL;
+	kfree(cd);
+}
+
+/**
+ * genwqe_bus_reset() - Card recovery
+ *
+ * pci_reset_function() will recover the device and ensure that the
+ * registers are accessible again when it completes with success. If
+ * not, the card will stay dead and registers will be unaccessible
+ * still.
+ */
+static int genwqe_bus_reset(struct genwqe_dev *cd)
+{
+	int bars, rc = 0;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	void __iomem *mmio;
+
+	if (cd->err_inject & GENWQE_INJECT_BUS_RESET_FAILURE)
+		return -EIO;
+
+	mmio = cd->mmio;
+	cd->mmio = NULL;
+	pci_iounmap(pci_dev, mmio);
+
+	bars = pci_select_bars(pci_dev, IORESOURCE_MEM);
+	pci_release_selected_regions(pci_dev, bars);
+
+	/*
+	 * Firmware/BIOS might change memory mapping during bus reset.
+	 * Settings like enable bus-mastering, ... are backuped and
+	 * restored by the pci_reset_function().
+	 */
+	dev_dbg(&pci_dev->dev, "[%s] pci_reset function ...\n", __func__);
+	rc = pci_reset_function(pci_dev);
+	if (rc) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: failed reset func (rc %d)\n", __func__, rc);
+		return rc;
+	}
+	dev_dbg(&pci_dev->dev, "[%s] done with rc=%d\n", __func__, rc);
+
+	/*
+	 * Here is the right spot to clear the register read
+	 * failure. pci_bus_reset() does this job in real systems.
+	 */
+	cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE |
+			    GENWQE_INJECT_GFIR_FATAL |
+			    GENWQE_INJECT_GFIR_INFO);
+
+	rc = pci_request_selected_regions(pci_dev, bars, genwqe_driver_name);
+	if (rc) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: request bars failed (%d)\n", __func__, rc);
+		return -EIO;
+	}
+
+	cd->mmio = pci_iomap(pci_dev, 0, 0);
+	if (cd->mmio == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: mapping BAR0 failed\n", __func__);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+/*
+ * Hardware circumvention section. Certain bitstreams in our test-lab
+ * had different kinds of problems. Here is where we adjust those
+ * bitstreams to function will with this version of our device driver.
+ *
+ * Thise circumventions are applied to the physical function only.
+ * The magical numbers below are identifying development/manufacturing
+ * versions of the bitstream used on the card.
+ *
+ * Turn off error reporting for old/manufacturing images.
+ */
+
+bool genwqe_need_err_masking(struct genwqe_dev *cd)
+{
+	return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull;
+}
+
+static void genwqe_tweak_hardware(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	/* Mask FIRs for development images */
+	if (((cd->slu_unitcfg & 0xFFFF0ull) >= 0x32000ull) &&
+	    ((cd->slu_unitcfg & 0xFFFF0ull) <= 0x33250ull)) {
+		dev_warn(&pci_dev->dev,
+			 "FIRs masked due to bitstream %016llx.%016llx\n",
+			 cd->slu_unitcfg, cd->app_unitcfg);
+
+		__genwqe_writeq(cd, IO_APP_SEC_LEM_DEBUG_OVR,
+				0xFFFFFFFFFFFFFFFFull);
+
+		__genwqe_writeq(cd, IO_APP_ERR_ACT_MASK,
+				0x0000000000000000ull);
+	}
+}
+
+/**
+ * genwqe_recovery_on_fatal_gfir_required() - Version depended actions
+ *
+ * Bitstreams older than 2013-02-17 have a bug where fatal GFIRs must
+ * be ignored. This is e.g. true for the bitstream we gave to the card
+ * manufacturer, but also for some old bitstreams we released to our
+ * test-lab.
+ */
+int genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd)
+{
+	return (cd->slu_unitcfg & 0xFFFF0ull) >= 0x32170ull;
+}
+
+int genwqe_flash_readback_fails(struct genwqe_dev *cd)
+{
+	return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull;
+}
+
+/**
+ * genwqe_T_psec() - Calculate PF/VF timeout register content
+ *
+ * Note: From a design perspective it turned out to be a bad idea to
+ * use codes here to specifiy the frequency/speed values. An old
+ * driver cannot understand new codes and is therefore always a
+ * problem. Better is to measure out the value or put the
+ * speed/frequency directly into a register which is always a valid
+ * value for old as well as for new software.
+ */
+/* T = 1/f */
+static int genwqe_T_psec(struct genwqe_dev *cd)
+{
+	u16 speed;	/* 1/f -> 250,  200,  166,  175 */
+	static const int T[] = { 4000, 5000, 6000, 5714 };
+
+	speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full);
+	if (speed >= ARRAY_SIZE(T))
+		return -1;	/* illegal value */
+
+	return T[speed];
+}
+
+/**
+ * genwqe_setup_pf_jtimer() - Setup PF hardware timeouts for DDCB execution
+ *
+ * Do this _after_ card_reset() is called. Otherwise the values will
+ * vanish. The settings need to be done when the queues are inactive.
+ *
+ * The max. timeout value is 2^(10+x) * T (6ns for 166MHz) * 15/16.
+ * The min. timeout value is 2^(10+x) * T (6ns for 166MHz) * 14/16.
+ */
+static bool genwqe_setup_pf_jtimer(struct genwqe_dev *cd)
+{
+	u32 T = genwqe_T_psec(cd);
+	u64 x;
+
+	if (genwqe_pf_jobtimeout_msec == 0)
+		return false;
+
+	/* PF: large value needed, flash update 2sec per block */
+	x = ilog2(genwqe_pf_jobtimeout_msec *
+		  16000000000uL/(T * 15)) - 10;
+
+	genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
+			  0xff00 | (x & 0xff), 0);
+	return true;
+}
+
+/**
+ * genwqe_setup_vf_jtimer() - Setup VF hardware timeouts for DDCB execution
+ */
+static bool genwqe_setup_vf_jtimer(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+	unsigned int vf;
+	u32 T = genwqe_T_psec(cd);
+	u64 x;
+
+	for (vf = 0; vf < pci_sriov_get_totalvfs(pci_dev); vf++) {
+
+		if (cd->vf_jobtimeout_msec[vf] == 0)
+			continue;
+
+		x = ilog2(cd->vf_jobtimeout_msec[vf] *
+			  16000000000uL/(T * 15)) - 10;
+
+		genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
+				  0xff00 | (x & 0xff), vf + 1);
+	}
+	return true;
+}
+
+static int genwqe_ffdc_buffs_alloc(struct genwqe_dev *cd)
+{
+	unsigned int type, e = 0;
+
+	for (type = 0; type < GENWQE_DBG_UNITS; type++) {
+		switch (type) {
+		case GENWQE_DBG_UNIT0:
+			e = genwqe_ffdc_buff_size(cd, 0);
+			break;
+		case GENWQE_DBG_UNIT1:
+			e = genwqe_ffdc_buff_size(cd, 1);
+			break;
+		case GENWQE_DBG_UNIT2:
+			e = genwqe_ffdc_buff_size(cd, 2);
+			break;
+		case GENWQE_DBG_REGS:
+			e = GENWQE_FFDC_REGS;
+			break;
+		}
+
+		/* currently support only the debug units mentioned here */
+		cd->ffdc[type].entries = e;
+		cd->ffdc[type].regs = kmalloc(e * sizeof(struct genwqe_reg),
+					      GFP_KERNEL);
+		/*
+		 * regs == NULL is ok, the using code treats this as no regs,
+		 * Printing warning is ok in this case.
+		 */
+	}
+	return 0;
+}
+
+static void genwqe_ffdc_buffs_free(struct genwqe_dev *cd)
+{
+	unsigned int type;
+
+	for (type = 0; type < GENWQE_DBG_UNITS; type++) {
+		kfree(cd->ffdc[type].regs);
+		cd->ffdc[type].regs = NULL;
+	}
+}
+
+static int genwqe_read_ids(struct genwqe_dev *cd)
+{
+	int err = 0;
+	int slu_id;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	cd->slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG);
+	if (cd->slu_unitcfg == IO_ILLEGAL_VALUE) {
+		dev_err(&pci_dev->dev,
+			"err: SLUID=%016llx\n", cd->slu_unitcfg);
+		err = -EIO;
+		goto out_err;
+	}
+
+	slu_id = genwqe_get_slu_id(cd);
+	if (slu_id < GENWQE_SLU_ARCH_REQ || slu_id == 0xff) {
+		dev_err(&pci_dev->dev,
+			"err: incompatible SLU Architecture %u\n", slu_id);
+		err = -ENOENT;
+		goto out_err;
+	}
+
+	cd->app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG);
+	if (cd->app_unitcfg == IO_ILLEGAL_VALUE) {
+		dev_err(&pci_dev->dev,
+			"err: APPID=%016llx\n", cd->app_unitcfg);
+		err = -EIO;
+		goto out_err;
+	}
+	genwqe_read_app_id(cd, cd->app_name, sizeof(cd->app_name));
+
+	/*
+	 * Is access to all registers possible? If we are a VF the
+	 * answer is obvious. If we run fully virtualized, we need to
+	 * check if we can access all registers. If we do not have
+	 * full access we will cause an UR and some informational FIRs
+	 * in the PF, but that should not harm.
+	 */
+	if (pci_dev->is_virtfn)
+		cd->is_privileged = 0;
+	else
+		cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM)
+				     != IO_ILLEGAL_VALUE);
+
+ out_err:
+	return err;
+}
+
+static int genwqe_start(struct genwqe_dev *cd)
+{
+	int err;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	err = genwqe_read_ids(cd);
+	if (err)
+		return err;
+
+	if (genwqe_is_privileged(cd)) {
+		/* do this after the tweaks. alloc fail is acceptable */
+		genwqe_ffdc_buffs_alloc(cd);
+		genwqe_stop_traps(cd);
+
+		/* Collect registers e.g. FIRs, UNITIDs, traces ... */
+		genwqe_read_ffdc_regs(cd, cd->ffdc[GENWQE_DBG_REGS].regs,
+				      cd->ffdc[GENWQE_DBG_REGS].entries, 0);
+
+		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT0,
+				      cd->ffdc[GENWQE_DBG_UNIT0].regs,
+				      cd->ffdc[GENWQE_DBG_UNIT0].entries);
+
+		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT1,
+				      cd->ffdc[GENWQE_DBG_UNIT1].regs,
+				      cd->ffdc[GENWQE_DBG_UNIT1].entries);
+
+		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT2,
+				      cd->ffdc[GENWQE_DBG_UNIT2].regs,
+				      cd->ffdc[GENWQE_DBG_UNIT2].entries);
+
+		genwqe_start_traps(cd);
+
+		if (cd->card_state == GENWQE_CARD_FATAL_ERROR) {
+			dev_warn(&pci_dev->dev,
+				 "[%s] chip reload/recovery!\n", __func__);
+
+			/*
+			 * Stealth Mode: Reload chip on either hot
+			 * reset or PERST.
+			 */
+			cd->softreset = 0x7Cull;
+			__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET,
+				       cd->softreset);
+
+			err = genwqe_bus_reset(cd);
+			if (err != 0) {
+				dev_err(&pci_dev->dev,
+					"[%s] err: bus reset failed!\n",
+					__func__);
+				goto out;
+			}
+
+			/*
+			 * Re-read the IDs because
+			 * it could happen that the bitstream load
+			 * failed!
+			 */
+			err = genwqe_read_ids(cd);
+			if (err)
+				goto out;
+		}
+	}
+
+	err = genwqe_setup_service_layer(cd);  /* does a reset to the card */
+	if (err != 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: could not setup servicelayer!\n", __func__);
+		err = -ENODEV;
+		goto out;
+	}
+
+	if (genwqe_is_privileged(cd)) {	 /* code is running _after_ reset */
+		genwqe_tweak_hardware(cd);
+
+		genwqe_setup_pf_jtimer(cd);
+		genwqe_setup_vf_jtimer(cd);
+	}
+
+	err = genwqe_device_create(cd);
+	if (err < 0) {
+		dev_err(&pci_dev->dev,
+			"err: chdev init failed! (err=%d)\n", err);
+		goto out_release_service_layer;
+	}
+	return 0;
+
+ out_release_service_layer:
+	genwqe_release_service_layer(cd);
+ out:
+	if (genwqe_is_privileged(cd))
+		genwqe_ffdc_buffs_free(cd);
+	return -EIO;
+}
+
+/**
+ * genwqe_stop() - Stop card operation
+ *
+ * Recovery notes:
+ *   As long as genwqe_thread runs we might access registers during
+ *   error data capture. Same is with the genwqe_health_thread.
+ *   When genwqe_bus_reset() fails this function might called two times:
+ *   first by the genwqe_health_thread() and later by genwqe_remove() to
+ *   unbind the device. We must be able to survive that.
+ *
+ * This function must be robust enough to be called twice.
+ */
+static int genwqe_stop(struct genwqe_dev *cd)
+{
+	genwqe_finish_queue(cd);	    /* no register access */
+	genwqe_device_remove(cd);	    /* device removed, procs killed */
+	genwqe_release_service_layer(cd);   /* here genwqe_thread is stopped */
+
+	if (genwqe_is_privileged(cd)) {
+		pci_disable_sriov(cd->pci_dev);	/* access pci config space */
+		genwqe_ffdc_buffs_free(cd);
+	}
+
+	return 0;
+}
+
+/**
+ * genwqe_recover_card() - Try to recover the card if it is possible
+ *
+ * If fatal_err is set no register access is possible anymore. It is
+ * likely that genwqe_start fails in that situation. Proper error
+ * handling is required in this case.
+ *
+ * genwqe_bus_reset() will cause the pci code to call genwqe_remove()
+ * and later genwqe_probe() for all virtual functions.
+ */
+static int genwqe_recover_card(struct genwqe_dev *cd, int fatal_err)
+{
+	int rc;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	genwqe_stop(cd);
+
+	/*
+	 * Make sure chip is not reloaded to maintain FFDC. Write SLU
+	 * Reset Register, CPLDReset field to 0.
+	 */
+	if (!fatal_err) {
+		cd->softreset = 0x70ull;
+		__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset);
+	}
+
+	rc = genwqe_bus_reset(cd);
+	if (rc != 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: card recovery impossible!\n", __func__);
+		return rc;
+	}
+
+	rc = genwqe_start(cd);
+	if (rc < 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: failed to launch device!\n", __func__);
+		return rc;
+	}
+	return 0;
+}
+
+static int genwqe_health_check_cond(struct genwqe_dev *cd, u64 *gfir)
+{
+	*gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	return (*gfir & GFIR_ERR_TRIGGER) &&
+		genwqe_recovery_on_fatal_gfir_required(cd);
+}
+
+/**
+ * genwqe_fir_checking() - Check the fault isolation registers of the card
+ *
+ * If this code works ok, can be tried out with help of the genwqe_poke tool:
+ *   sudo ./tools/genwqe_poke 0x8 0xfefefefefef
+ *
+ * Now the relevant FIRs/sFIRs should be printed out and the driver should
+ * invoke recovery (devices are removed and readded).
+ */
+static u64 genwqe_fir_checking(struct genwqe_dev *cd)
+{
+	int j, iterations = 0;
+	u64 mask, fir, fec, uid, gfir, gfir_masked, sfir, sfec;
+	u32 fir_addr, fir_clr_addr, fec_addr, sfir_addr, sfec_addr;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+ healthMonitor:
+	iterations++;
+	if (iterations > 16) {
+		dev_err(&pci_dev->dev, "* exit looping after %d times\n",
+			iterations);
+		goto fatal_error;
+	}
+
+	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	if (gfir != 0x0)
+		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n",
+				    IO_SLC_CFGREG_GFIR, gfir);
+	if (gfir == IO_ILLEGAL_VALUE)
+		goto fatal_error;
+
+	/*
+	 * Avoid printing when to GFIR bit is on prevents contignous
+	 * printout e.g. for the following bug:
+	 *   FIR set without a 2ndary FIR/FIR cannot be cleared
+	 * Comment out the following if to get the prints:
+	 */
+	if (gfir == 0)
+		return 0;
+
+	gfir_masked = gfir & GFIR_ERR_TRIGGER;  /* fatal errors */
+
+	for (uid = 0; uid < GENWQE_MAX_UNITS; uid++) { /* 0..2 in zEDC */
+
+		/* read the primary FIR (pfir) */
+		fir_addr = (uid << 24) + 0x08;
+		fir = __genwqe_readq(cd, fir_addr);
+		if (fir == 0x0)
+			continue;  /* no error in this unit */
+
+		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fir_addr, fir);
+		if (fir == IO_ILLEGAL_VALUE)
+			goto fatal_error;
+
+		/* read primary FEC */
+		fec_addr = (uid << 24) + 0x18;
+		fec = __genwqe_readq(cd, fec_addr);
+
+		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fec_addr, fec);
+		if (fec == IO_ILLEGAL_VALUE)
+			goto fatal_error;
+
+		for (j = 0, mask = 1ULL; j < 64; j++, mask <<= 1) {
+
+			/* secondary fir empty, skip it */
+			if ((fir & mask) == 0x0)
+				continue;
+
+			sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
+			sfir = __genwqe_readq(cd, sfir_addr);
+
+			if (sfir == IO_ILLEGAL_VALUE)
+				goto fatal_error;
+			dev_err(&pci_dev->dev,
+				"* 0x%08x 0x%016llx\n", sfir_addr, sfir);
+
+			sfec_addr = (uid << 24) + 0x300 + 0x08 * j;
+			sfec = __genwqe_readq(cd, sfec_addr);
+
+			if (sfec == IO_ILLEGAL_VALUE)
+				goto fatal_error;
+			dev_err(&pci_dev->dev,
+				"* 0x%08x 0x%016llx\n", sfec_addr, sfec);
+
+			gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+			if (gfir == IO_ILLEGAL_VALUE)
+				goto fatal_error;
+
+			/* gfir turned on during routine! get out and
+			   start over. */
+			if ((gfir_masked == 0x0) &&
+			    (gfir & GFIR_ERR_TRIGGER)) {
+				goto healthMonitor;
+			}
+
+			/* do not clear if we entered with a fatal gfir */
+			if (gfir_masked == 0x0) {
+
+				/* NEW clear by mask the logged bits */
+				sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
+				__genwqe_writeq(cd, sfir_addr, sfir);
+
+				dev_dbg(&pci_dev->dev,
+					"[HM] Clearing  2ndary FIR 0x%08x "
+					"with 0x%016llx\n", sfir_addr, sfir);
+
+				/*
+				 * note, these cannot be error-Firs
+				 * since gfir_masked is 0 after sfir
+				 * was read. Also, it is safe to do
+				 * this write if sfir=0. Still need to
+				 * clear the primary. This just means
+				 * there is no secondary FIR.
+				 */
+
+				/* clear by mask the logged bit. */
+				fir_clr_addr = (uid << 24) + 0x10;
+				__genwqe_writeq(cd, fir_clr_addr, mask);
+
+				dev_dbg(&pci_dev->dev,
+					"[HM] Clearing primary FIR 0x%08x "
+					"with 0x%016llx\n", fir_clr_addr,
+					mask);
+			}
+		}
+	}
+	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	if (gfir == IO_ILLEGAL_VALUE)
+		goto fatal_error;
+
+	if ((gfir_masked == 0x0) && (gfir & GFIR_ERR_TRIGGER)) {
+		/*
+		 * Check once more that it didn't go on after all the
+		 * FIRS were cleared.
+		 */
+		dev_dbg(&pci_dev->dev, "ACK! Another FIR! Recursing %d!\n",
+			iterations);
+		goto healthMonitor;
+	}
+	return gfir_masked;
+
+ fatal_error:
+	return IO_ILLEGAL_VALUE;
+}
+
+/**
+ * genwqe_health_thread() - Health checking thread
+ *
+ * This thread is only started for the PF of the card.
+ *
+ * This thread monitors the health of the card. A critical situation
+ * is when we read registers which contain -1 (IO_ILLEGAL_VALUE). In
+ * this case we need to be recovered from outside. Writing to
+ * registers will very likely not work either.
+ *
+ * This thread must only exit if kthread_should_stop() becomes true.
+ *
+ * Condition for the health-thread to trigger:
+ *   a) when a kthread_stop() request comes in or
+ *   b) a critical GFIR occured
+ *
+ * Informational GFIRs are checked and potentially printed in
+ * health_check_interval seconds.
+ */
+static int genwqe_health_thread(void *data)
+{
+	int rc, should_stop = 0;
+	struct genwqe_dev *cd = data;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	u64 gfir, gfir_masked, slu_unitcfg, app_unitcfg;
+
+	while (!kthread_should_stop()) {
+		rc = wait_event_interruptible_timeout(cd->health_waitq,
+			 (genwqe_health_check_cond(cd, &gfir) ||
+			  (should_stop = kthread_should_stop())),
+				genwqe_health_check_interval * HZ);
+
+		if (should_stop)
+			break;
+
+		if (gfir == IO_ILLEGAL_VALUE) {
+			dev_err(&pci_dev->dev,
+				"[%s] GFIR=%016llx\n", __func__, gfir);
+			goto fatal_error;
+		}
+
+		slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG);
+		if (slu_unitcfg == IO_ILLEGAL_VALUE) {
+			dev_err(&pci_dev->dev,
+				"[%s] SLU_UNITCFG=%016llx\n",
+				__func__, slu_unitcfg);
+			goto fatal_error;
+		}
+
+		app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG);
+		if (app_unitcfg == IO_ILLEGAL_VALUE) {
+			dev_err(&pci_dev->dev,
+				"[%s] APP_UNITCFG=%016llx\n",
+				__func__, app_unitcfg);
+			goto fatal_error;
+		}
+
+		gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+		if (gfir == IO_ILLEGAL_VALUE) {
+			dev_err(&pci_dev->dev,
+				"[%s] %s: GFIR=%016llx\n", __func__,
+				(gfir & GFIR_ERR_TRIGGER) ? "err" : "info",
+				gfir);
+			goto fatal_error;
+		}
+
+		gfir_masked = genwqe_fir_checking(cd);
+		if (gfir_masked == IO_ILLEGAL_VALUE)
+			goto fatal_error;
+
+		/*
+		 * GFIR ErrorTrigger bits set => reset the card!
+		 * Never do this for old/manufacturing images!
+		 */
+		if ((gfir_masked) && !cd->skip_recovery &&
+		    genwqe_recovery_on_fatal_gfir_required(cd)) {
+
+			cd->card_state = GENWQE_CARD_FATAL_ERROR;
+
+			rc = genwqe_recover_card(cd, 0);
+			if (rc < 0) {
+				/* FIXME Card is unusable and needs unbind! */
+				goto fatal_error;
+			}
+		}
+
+		cd->last_gfir = gfir;
+		cond_resched();
+	}
+
+	return 0;
+
+ fatal_error:
+	dev_err(&pci_dev->dev,
+		"[%s] card unusable. Please trigger unbind!\n", __func__);
+
+	/* Bring down logical devices to inform user space via udev remove. */
+	cd->card_state = GENWQE_CARD_FATAL_ERROR;
+	genwqe_stop(cd);
+
+	/* genwqe_bus_reset failed(). Now wait for genwqe_remove(). */
+	while (!kthread_should_stop())
+		cond_resched();
+
+	return -EIO;
+}
+
+static int genwqe_health_check_start(struct genwqe_dev *cd)
+{
+	int rc;
+
+	if (genwqe_health_check_interval <= 0)
+		return 0;	/* valid for disabling the service */
+
+	/* moved before request_irq() */
+	/* init_waitqueue_head(&cd->health_waitq); */
+
+	cd->health_thread = kthread_run(genwqe_health_thread, cd,
+					GENWQE_DEVNAME "%d_health",
+					cd->card_idx);
+	if (IS_ERR(cd->health_thread)) {
+		rc = PTR_ERR(cd->health_thread);
+		cd->health_thread = NULL;
+		return rc;
+	}
+	return 0;
+}
+
+static int genwqe_health_thread_running(struct genwqe_dev *cd)
+{
+	return cd->health_thread != NULL;
+}
+
+static int genwqe_health_check_stop(struct genwqe_dev *cd)
+{
+	int rc;
+
+	if (!genwqe_health_thread_running(cd))
+		return -EIO;
+
+	rc = kthread_stop(cd->health_thread);
+	cd->health_thread = NULL;
+	return 0;
+}
+
+/**
+ * genwqe_pci_setup() - Allocate PCIe related resources for our card
+ */
+static int genwqe_pci_setup(struct genwqe_dev *cd)
+{
+	int err, bars;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	bars = pci_select_bars(pci_dev, IORESOURCE_MEM);
+	err = pci_enable_device_mem(pci_dev);
+	if (err) {
+		dev_err(&pci_dev->dev,
+			"err: failed to enable pci memory (err=%d)\n", err);
+		goto err_out;
+	}
+
+	/* Reserve PCI I/O and memory resources */
+	err = pci_request_selected_regions(pci_dev, bars, genwqe_driver_name);
+	if (err) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: request bars failed (%d)\n", __func__, err);
+		err = -EIO;
+		goto err_disable_device;
+	}
+
+	/* check for 64-bit DMA address supported (DAC) */
+	if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(64))) {
+		err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(64));
+		if (err) {
+			dev_err(&pci_dev->dev,
+				"err: DMA64 consistent mask error\n");
+			err = -EIO;
+			goto out_release_resources;
+		}
+	/* check for 32-bit DMA address supported (SAC) */
+	} else if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) {
+		err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pci_dev->dev,
+				"err: DMA32 consistent mask error\n");
+			err = -EIO;
+			goto out_release_resources;
+		}
+	} else {
+		dev_err(&pci_dev->dev,
+			"err: neither DMA32 nor DMA64 supported\n");
+		err = -EIO;
+		goto out_release_resources;
+	}
+
+	pci_set_master(pci_dev);
+	pci_enable_pcie_error_reporting(pci_dev);
+
+	/* request complete BAR-0 space (length = 0) */
+	cd->mmio_len = pci_resource_len(pci_dev, 0);
+	cd->mmio = pci_iomap(pci_dev, 0, 0);
+	if (cd->mmio == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: mapping BAR0 failed\n", __func__);
+		err = -ENOMEM;
+		goto out_release_resources;
+	}
+
+	cd->num_vfs = pci_sriov_get_totalvfs(pci_dev);
+
+	err = genwqe_read_ids(cd);
+	if (err)
+		goto out_iounmap;
+
+	return 0;
+
+ out_iounmap:
+	pci_iounmap(pci_dev, cd->mmio);
+ out_release_resources:
+	pci_release_selected_regions(pci_dev, bars);
+ err_disable_device:
+	pci_disable_device(pci_dev);
+ err_out:
+	return err;
+}
+
+/**
+ * genwqe_pci_remove() - Free PCIe related resources for our card
+ */
+static void genwqe_pci_remove(struct genwqe_dev *cd)
+{
+	int bars;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (cd->mmio)
+		pci_iounmap(pci_dev, cd->mmio);
+
+	bars = pci_select_bars(pci_dev, IORESOURCE_MEM);
+	pci_release_selected_regions(pci_dev, bars);
+	pci_disable_device(pci_dev);
+}
+
+/**
+ * genwqe_probe() - Device initialization
+ * @pdev:	PCI device information struct
+ *
+ * Callable for multiple cards. This function is called on bind.
+ *
+ * Return: 0 if succeeded, < 0 when failed
+ */
+static int genwqe_probe(struct pci_dev *pci_dev,
+			const struct pci_device_id *id)
+{
+	int err;
+	struct genwqe_dev *cd;
+
+	genwqe_init_crc32();
+
+	cd = genwqe_dev_alloc();
+	if (IS_ERR(cd)) {
+		dev_err(&pci_dev->dev, "err: could not alloc mem (err=%d)!\n",
+			(int)PTR_ERR(cd));
+		return PTR_ERR(cd);
+	}
+
+	dev_set_drvdata(&pci_dev->dev, cd);
+	cd->pci_dev = pci_dev;
+
+	err = genwqe_pci_setup(cd);
+	if (err < 0) {
+		dev_err(&pci_dev->dev,
+			"err: problems with PCI setup (err=%d)\n", err);
+		goto out_free_dev;
+	}
+
+	err = genwqe_start(cd);
+	if (err < 0) {
+		dev_err(&pci_dev->dev,
+			"err: cannot start card services! (err=%d)\n", err);
+		goto out_pci_remove;
+	}
+
+	if (genwqe_is_privileged(cd)) {
+		err = genwqe_health_check_start(cd);
+		if (err < 0) {
+			dev_err(&pci_dev->dev,
+				"err: cannot start health checking! "
+				"(err=%d)\n", err);
+			goto out_stop_services;
+		}
+	}
+	return 0;
+
+ out_stop_services:
+	genwqe_stop(cd);
+ out_pci_remove:
+	genwqe_pci_remove(cd);
+ out_free_dev:
+	genwqe_dev_free(cd);
+	return err;
+}
+
+/**
+ * genwqe_remove() - Called when device is removed (hot-plugable)
+ *
+ * Or when driver is unloaded respecitively when unbind is done.
+ */
+static void genwqe_remove(struct pci_dev *pci_dev)
+{
+	struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
+
+	genwqe_health_check_stop(cd);
+
+	/*
+	 * genwqe_stop() must survive if it is called twice
+	 * sequentially. This happens when the health thread calls it
+	 * and fails on genwqe_bus_reset().
+	 */
+	genwqe_stop(cd);
+	genwqe_pci_remove(cd);
+	genwqe_dev_free(cd);
+}
+
+/*
+ * genwqe_err_error_detected() - Error detection callback
+ *
+ * This callback is called by the PCI subsystem whenever a PCI bus
+ * error is detected.
+ */
+static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev,
+						 enum pci_channel_state state)
+{
+	struct genwqe_dev *cd;
+
+	dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state);
+
+	if (pci_dev == NULL)
+		return PCI_ERS_RESULT_NEED_RESET;
+
+	cd = dev_get_drvdata(&pci_dev->dev);
+	if (cd == NULL)
+		return PCI_ERS_RESULT_NEED_RESET;
+
+	switch (state) {
+	case pci_channel_io_normal:
+		return PCI_ERS_RESULT_CAN_RECOVER;
+	case pci_channel_io_frozen:
+		return PCI_ERS_RESULT_NEED_RESET;
+	case pci_channel_io_perm_failure:
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev)
+{
+	return PCI_ERS_RESULT_NONE;
+}
+
+static void genwqe_err_resume(struct pci_dev *dev)
+{
+}
+
+static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs)
+{
+	struct genwqe_dev *cd = dev_get_drvdata(&dev->dev);
+
+	if (numvfs > 0) {
+		genwqe_setup_vf_jtimer(cd);
+		pci_enable_sriov(dev, numvfs);
+		return numvfs;
+	}
+	if (numvfs == 0) {
+		pci_disable_sriov(dev);
+		return 0;
+	}
+	return 0;
+}
+
+static struct pci_error_handlers genwqe_err_handler = {
+	.error_detected = genwqe_err_error_detected,
+	.mmio_enabled	= genwqe_err_result_none,
+	.link_reset	= genwqe_err_result_none,
+	.slot_reset	= genwqe_err_result_none,
+	.resume		= genwqe_err_resume,
+};
+
+static struct pci_driver genwqe_driver = {
+	.name	  = genwqe_driver_name,
+	.id_table = genwqe_device_table,
+	.probe	  = genwqe_probe,
+	.remove	  = genwqe_remove,
+	.sriov_configure = genwqe_sriov_configure,
+	.err_handler = &genwqe_err_handler,
+};
+
+/**
+ * genwqe_init_module() - Driver registration and initialization
+ */
+static int __init genwqe_init_module(void)
+{
+	int rc;
+
+	class_genwqe = class_create(THIS_MODULE, GENWQE_DEVNAME);
+	if (IS_ERR(class_genwqe)) {
+		pr_err("[%s] create class failed\n", __func__);
+		return -ENOMEM;
+	}
+
+	debugfs_genwqe = debugfs_create_dir(GENWQE_DEVNAME, NULL);
+	if (!debugfs_genwqe) {
+		rc = -ENOMEM;
+		goto err_out;
+	}
+
+	rc = pci_register_driver(&genwqe_driver);
+	if (rc != 0) {
+		pr_err("[%s] pci_reg_driver (rc=%d)\n", __func__, rc);
+		goto err_out0;
+	}
+
+	return rc;
+
+ err_out0:
+	debugfs_remove(debugfs_genwqe);
+ err_out:
+	class_destroy(class_genwqe);
+	return rc;
+}
+
+/**
+ * genwqe_exit_module() - Driver exit
+ */
+static void __exit genwqe_exit_module(void)
+{
+	pci_unregister_driver(&genwqe_driver);
+	debugfs_remove(debugfs_genwqe);
+	class_destroy(class_genwqe);
+}
+
+module_init(genwqe_init_module);
+module_exit(genwqe_exit_module);
diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h
new file mode 100644
index 000000000000..5e4dbd21f89a
--- /dev/null
+++ b/drivers/misc/genwqe/card_base.h
@@ -0,0 +1,557 @@
+#ifndef __CARD_BASE_H__
+#define __CARD_BASE_H__
+
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Interfaces within the GenWQE module. Defines genwqe_card and
+ * ddcb_queue as well as ddcb_requ.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/cdev.h>
+#include <linux/stringify.h>
+#include <linux/pci.h>
+#include <linux/semaphore.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/version.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+
+#include <linux/genwqe/genwqe_card.h>
+#include "genwqe_driver.h"
+
+#define GENWQE_MSI_IRQS			4  /* Just one supported, no MSIx */
+#define GENWQE_FLAG_MSI_ENABLED		(1 << 0)
+
+#define GENWQE_MAX_VFS			15 /* maximum 15 VFs are possible */
+#define GENWQE_MAX_FUNCS		16 /* 1 PF and 15 VFs */
+#define GENWQE_CARD_NO_MAX		(16 * GENWQE_MAX_FUNCS)
+
+/* Compile parameters, some of them appear in debugfs for later adjustment */
+#define genwqe_ddcb_max			32 /* DDCBs on the work-queue */
+#define genwqe_polling_enabled		0  /* in case of irqs not working */
+#define genwqe_ddcb_software_timeout	10 /* timeout per DDCB in seconds */
+#define genwqe_kill_timeout		8  /* time until process gets killed */
+#define genwqe_vf_jobtimeout_msec	250  /* 250 msec */
+#define genwqe_pf_jobtimeout_msec	8000 /* 8 sec should be ok */
+#define genwqe_health_check_interval	4 /* <= 0: disabled */
+
+/* Sysfs attribute groups used when we create the genwqe device */
+extern const struct attribute_group *genwqe_attribute_groups[];
+
+/*
+ * Config space for Genwqe5 A7:
+ * 00:[14 10 4b 04]40 00 10 00[00 00 00 12]00 00 00 00
+ * 10: 0c 00 00 f0 07 3c 00 00 00 00 00 00 00 00 00 00
+ * 20: 00 00 00 00 00 00 00 00 00 00 00 00[14 10 4b 04]
+ * 30: 00 00 00 00 50 00 00 00 00 00 00 00 00 00 00 00
+ */
+#define PCI_DEVICE_GENWQE		0x044b /* Genwqe DeviceID */
+
+#define PCI_SUBSYSTEM_ID_GENWQE5	0x035f /* Genwqe A5 Subsystem-ID */
+#define PCI_SUBSYSTEM_ID_GENWQE5_NEW	0x044b /* Genwqe A5 Subsystem-ID */
+#define PCI_CLASSCODE_GENWQE5		0x1200 /* UNKNOWN */
+
+#define PCI_SUBVENDOR_ID_IBM_SRIOV	0x0000
+#define PCI_SUBSYSTEM_ID_GENWQE5_SRIOV	0x0000 /* Genwqe A5 Subsystem-ID */
+#define PCI_CLASSCODE_GENWQE5_SRIOV	0x1200 /* UNKNOWN */
+
+#define	GENWQE_SLU_ARCH_REQ		2 /* Required SLU architecture level */
+
+/**
+ * struct genwqe_reg - Genwqe data dump functionality
+ */
+struct genwqe_reg {
+	u32 addr;
+	u32 idx;
+	u64 val;
+};
+
+/*
+ * enum genwqe_dbg_type - Specify chip unit to dump/debug
+ */
+enum genwqe_dbg_type {
+	GENWQE_DBG_UNIT0 = 0,  /* captured before prev errs cleared */
+	GENWQE_DBG_UNIT1 = 1,
+	GENWQE_DBG_UNIT2 = 2,
+	GENWQE_DBG_UNIT3 = 3,
+	GENWQE_DBG_UNIT4 = 4,
+	GENWQE_DBG_UNIT5 = 5,
+	GENWQE_DBG_UNIT6 = 6,
+	GENWQE_DBG_UNIT7 = 7,
+	GENWQE_DBG_REGS  = 8,
+	GENWQE_DBG_DMA   = 9,
+	GENWQE_DBG_UNITS = 10, /* max number of possible debug units  */
+};
+
+/* Software error injection to simulate card failures */
+#define GENWQE_INJECT_HARDWARE_FAILURE	0x00000001 /* injects -1 reg reads */
+#define GENWQE_INJECT_BUS_RESET_FAILURE 0x00000002 /* pci_bus_reset fail */
+#define GENWQE_INJECT_GFIR_FATAL	0x00000004 /* GFIR = 0x0000ffff */
+#define GENWQE_INJECT_GFIR_INFO		0x00000008 /* GFIR = 0xffff0000 */
+
+/*
+ * Genwqe card description and management data.
+ *
+ * Error-handling in case of card malfunction
+ * ------------------------------------------
+ *
+ * If the card is detected to be defective the outside environment
+ * will cause the PCI layer to call deinit (the cleanup function for
+ * probe). This is the same effect like doing a unbind/bind operation
+ * on the card.
+ *
+ * The genwqe card driver implements a health checking thread which
+ * verifies the card function. If this detects a problem the cards
+ * device is being shutdown and restarted again, along with a reset of
+ * the card and queue.
+ *
+ * All functions accessing the card device return either -EIO or -ENODEV
+ * code to indicate the malfunction to the user. The user has to close
+ * the file descriptor and open a new one, once the card becomes
+ * available again.
+ *
+ * If the open file descriptor is setup to receive SIGIO, the signal is
+ * genereated for the application which has to provide a handler to
+ * react on it. If the application does not close the open
+ * file descriptor a SIGKILL is send to enforce freeing the cards
+ * resources.
+ *
+ * I did not find a different way to prevent kernel problems due to
+ * reference counters for the cards character devices getting out of
+ * sync. The character device deallocation does not block, even if
+ * there is still an open file descriptor pending. If this pending
+ * descriptor is closed, the data structures used by the character
+ * device is reinstantiated, which will lead to the reference counter
+ * dropping below the allowed values.
+ *
+ * Card recovery
+ * -------------
+ *
+ * To test the internal driver recovery the following command can be used:
+ *   sudo sh -c 'echo 0xfffff > /sys/class/genwqe/genwqe0_card/err_inject'
+ */
+
+
+/**
+ * struct dma_mapping_type - Mapping type definition
+ *
+ * To avoid memcpying data arround we use user memory directly. To do
+ * this we need to pin/swap-in the memory and request a DMA address
+ * for it.
+ */
+enum dma_mapping_type {
+	GENWQE_MAPPING_RAW = 0,		/* contignous memory buffer */
+	GENWQE_MAPPING_SGL_TEMP,	/* sglist dynamically used */
+	GENWQE_MAPPING_SGL_PINNED,	/* sglist used with pinning */
+};
+
+/**
+ * struct dma_mapping - Information about memory mappings done by the driver
+ */
+struct dma_mapping {
+	enum dma_mapping_type type;
+
+	void *u_vaddr;			/* user-space vaddr/non-aligned */
+	void *k_vaddr;			/* kernel-space vaddr/non-aligned */
+	dma_addr_t dma_addr;		/* physical DMA address */
+
+	struct page **page_list;	/* list of pages used by user buff */
+	dma_addr_t *dma_list;		/* list of dma addresses per page */
+	unsigned int nr_pages;		/* number of pages */
+	unsigned int size;		/* size in bytes */
+
+	struct list_head card_list;	/* list of usr_maps for card */
+	struct list_head pin_list;	/* list of pinned memory for dev */
+};
+
+static inline void genwqe_mapping_init(struct dma_mapping *m,
+				       enum dma_mapping_type type)
+{
+	memset(m, 0, sizeof(*m));
+	m->type = type;
+}
+
+/**
+ * struct ddcb_queue - DDCB queue data
+ * @ddcb_max:          Number of DDCBs on the queue
+ * @ddcb_next:         Next free DDCB
+ * @ddcb_act:          Next DDCB supposed to finish
+ * @ddcb_seq:          Sequence number of last DDCB
+ * @ddcbs_in_flight:   Currently enqueued DDCBs
+ * @ddcbs_completed:   Number of already completed DDCBs
+ * @busy:              Number of -EBUSY returns
+ * @ddcb_daddr:        DMA address of first DDCB in the queue
+ * @ddcb_vaddr:        Kernel virtual address of first DDCB in the queue
+ * @ddcb_req:          Associated requests (one per DDCB)
+ * @ddcb_waitqs:       Associated wait queues (one per DDCB)
+ * @ddcb_lock:         Lock to protect queuing operations
+ * @ddcb_waitq:        Wait on next DDCB finishing
+ */
+
+struct ddcb_queue {
+	int ddcb_max;			/* amount of DDCBs  */
+	int ddcb_next;			/* next available DDCB num */
+	int ddcb_act;			/* DDCB to be processed */
+	u16 ddcb_seq;			/* slc seq num */
+	unsigned int ddcbs_in_flight;	/* number of ddcbs in processing */
+	unsigned int ddcbs_completed;
+	unsigned int ddcbs_max_in_flight;
+	unsigned int busy;		/* how many times -EBUSY? */
+
+	dma_addr_t ddcb_daddr;		/* DMA address */
+	struct ddcb *ddcb_vaddr;	/* kernel virtual addr for DDCBs */
+	struct ddcb_requ **ddcb_req;	/* ddcb processing parameter */
+	wait_queue_head_t *ddcb_waitqs; /* waitqueue per ddcb */
+
+	spinlock_t ddcb_lock;		/* exclusive access to queue */
+	wait_queue_head_t ddcb_waitq;	/* wait for ddcb processing */
+
+	/* registers or the respective queue to be used */
+	u32 IO_QUEUE_CONFIG;
+	u32 IO_QUEUE_STATUS;
+	u32 IO_QUEUE_SEGMENT;
+	u32 IO_QUEUE_INITSQN;
+	u32 IO_QUEUE_WRAP;
+	u32 IO_QUEUE_OFFSET;
+	u32 IO_QUEUE_WTIME;
+	u32 IO_QUEUE_ERRCNTS;
+	u32 IO_QUEUE_LRW;
+};
+
+/*
+ * GFIR, SLU_UNITCFG, APP_UNITCFG
+ *   8 Units with FIR/FEC + 64 * 2ndary FIRS/FEC.
+ */
+#define GENWQE_FFDC_REGS	(3 + (8 * (2 + 2 * 64)))
+
+struct genwqe_ffdc {
+	unsigned int entries;
+	struct genwqe_reg *regs;
+};
+
+/**
+ * struct genwqe_dev - GenWQE device information
+ * @card_state:       Card operation state, see above
+ * @ffdc:             First Failure Data Capture buffers for each unit
+ * @card_thread:      Working thread to operate the DDCB queue
+ * @card_waitq:       Wait queue used in card_thread
+ * @queue:            DDCB queue
+ * @health_thread:    Card monitoring thread (only for PFs)
+ * @health_waitq:     Wait queue used in health_thread
+ * @pci_dev:          Associated PCI device (function)
+ * @mmio:             Base address of 64-bit register space
+ * @mmio_len:         Length of register area
+ * @file_lock:        Lock to protect access to file_list
+ * @file_list:        List of all processes with open GenWQE file descriptors
+ *
+ * This struct contains all information needed to communicate with a
+ * GenWQE card. It is initialized when a GenWQE device is found and
+ * destroyed when it goes away. It holds data to maintain the queue as
+ * well as data needed to feed the user interfaces.
+ */
+struct genwqe_dev {
+	enum genwqe_card_state card_state;
+	spinlock_t print_lock;
+
+	int card_idx;			/* card index 0..CARD_NO_MAX-1 */
+	u64 flags;			/* general flags */
+
+	/* FFDC data gathering */
+	struct genwqe_ffdc ffdc[GENWQE_DBG_UNITS];
+
+	/* DDCB workqueue */
+	struct task_struct *card_thread;
+	wait_queue_head_t queue_waitq;
+	struct ddcb_queue queue;	/* genwqe DDCB queue */
+	unsigned int irqs_processed;
+
+	/* Card health checking thread */
+	struct task_struct *health_thread;
+	wait_queue_head_t health_waitq;
+
+	/* char device */
+	dev_t  devnum_genwqe;		/* major/minor num card */
+	struct class *class_genwqe;	/* reference to class object */
+	struct device *dev;		/* for device creation */
+	struct cdev cdev_genwqe;	/* char device for card */
+
+	struct dentry *debugfs_root;	/* debugfs card root directory */
+	struct dentry *debugfs_genwqe;	/* debugfs driver root directory */
+
+	/* pci resources */
+	struct pci_dev *pci_dev;	/* PCI device */
+	void __iomem *mmio;		/* BAR-0 MMIO start */
+	unsigned long mmio_len;
+	u16 num_vfs;
+	u32 vf_jobtimeout_msec[GENWQE_MAX_VFS];
+	int is_privileged;		/* access to all regs possible */
+
+	/* config regs which we need often */
+	u64 slu_unitcfg;
+	u64 app_unitcfg;
+	u64 softreset;
+	u64 err_inject;
+	u64 last_gfir;
+	char app_name[5];
+
+	spinlock_t file_lock;		/* lock for open files */
+	struct list_head file_list;	/* list of open files */
+
+	/* debugfs parameters */
+	int ddcb_software_timeout;	/* wait until DDCB times out */
+	int skip_recovery;		/* circumvention if recovery fails */
+	int kill_timeout;		/* wait after sending SIGKILL */
+};
+
+/**
+ * enum genwqe_requ_state - State of a DDCB execution request
+ */
+enum genwqe_requ_state {
+	GENWQE_REQU_NEW      = 0,
+	GENWQE_REQU_ENQUEUED = 1,
+	GENWQE_REQU_TAPPED   = 2,
+	GENWQE_REQU_FINISHED = 3,
+	GENWQE_REQU_STATE_MAX,
+};
+
+/**
+ * struct ddcb_requ - Kernel internal representation of the DDCB request
+ * @cmd:          User space representation of the DDCB execution request
+ */
+struct ddcb_requ {
+	/* kernel specific content */
+	enum genwqe_requ_state req_state; /* request status */
+	int num;			  /* ddcb_no for this request */
+	struct ddcb_queue *queue;	  /* associated queue */
+
+	struct dma_mapping  dma_mappings[DDCB_FIXUPS];
+	struct sg_entry     *sgl[DDCB_FIXUPS];
+	dma_addr_t	    sgl_dma_addr[DDCB_FIXUPS];
+	size_t		    sgl_size[DDCB_FIXUPS];
+
+	/* kernel/user shared content */
+	struct genwqe_ddcb_cmd cmd;	/* ddcb_no for this request */
+	struct genwqe_debug_data debug_data;
+};
+
+/**
+ * struct genwqe_file - Information for open GenWQE devices
+ */
+struct genwqe_file {
+	struct genwqe_dev *cd;
+	struct genwqe_driver *client;
+	struct file *filp;
+
+	struct fasync_struct *async_queue;
+	struct task_struct *owner;
+	struct list_head list;		/* entry in list of open files */
+
+	spinlock_t map_lock;		/* lock for dma_mappings */
+	struct list_head map_list;	/* list of dma_mappings */
+
+	spinlock_t pin_lock;		/* lock for pinned memory */
+	struct list_head pin_list;	/* list of pinned memory */
+};
+
+int  genwqe_setup_service_layer(struct genwqe_dev *cd); /* for PF only */
+int  genwqe_finish_queue(struct genwqe_dev *cd);
+int  genwqe_release_service_layer(struct genwqe_dev *cd);
+
+/**
+ * genwqe_get_slu_id() - Read Service Layer Unit Id
+ * Return: 0x00: Development code
+ *         0x01: SLC1 (old)
+ *         0x02: SLC2 (sept2012)
+ *         0x03: SLC2 (feb2013, generic driver)
+ */
+static inline int genwqe_get_slu_id(struct genwqe_dev *cd)
+{
+	return (int)((cd->slu_unitcfg >> 32) & 0xff);
+}
+
+int  genwqe_ddcbs_in_flight(struct genwqe_dev *cd);
+
+u8   genwqe_card_type(struct genwqe_dev *cd);
+int  genwqe_card_reset(struct genwqe_dev *cd);
+int  genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count);
+void genwqe_reset_interrupt_capability(struct genwqe_dev *cd);
+
+int  genwqe_device_create(struct genwqe_dev *cd);
+int  genwqe_device_remove(struct genwqe_dev *cd);
+
+/* debugfs */
+int  genwqe_init_debugfs(struct genwqe_dev *cd);
+void genqwe_exit_debugfs(struct genwqe_dev *cd);
+
+int  genwqe_read_softreset(struct genwqe_dev *cd);
+
+/* Hardware Circumventions */
+int  genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd);
+int  genwqe_flash_readback_fails(struct genwqe_dev *cd);
+
+/**
+ * genwqe_write_vreg() - Write register in VF window
+ * @cd:    genwqe device
+ * @reg:   register address
+ * @val:   value to write
+ * @func:  0: PF, 1: VF0, ..., 15: VF14
+ */
+int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func);
+
+/**
+ * genwqe_read_vreg() - Read register in VF window
+ * @cd:    genwqe device
+ * @reg:   register address
+ * @func:  0: PF, 1: VF0, ..., 15: VF14
+ *
+ * Return: content of the register
+ */
+u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func);
+
+/* FFDC Buffer Management */
+int  genwqe_ffdc_buff_size(struct genwqe_dev *cd, int unit_id);
+int  genwqe_ffdc_buff_read(struct genwqe_dev *cd, int unit_id,
+			   struct genwqe_reg *regs, unsigned int max_regs);
+int  genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs,
+			   unsigned int max_regs, int all);
+int  genwqe_ffdc_dump_dma(struct genwqe_dev *cd,
+			  struct genwqe_reg *regs, unsigned int max_regs);
+
+int  genwqe_init_debug_data(struct genwqe_dev *cd,
+			    struct genwqe_debug_data *d);
+
+void genwqe_init_crc32(void);
+int  genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len);
+
+/* Memory allocation/deallocation; dma address handling */
+int  genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m,
+		      void *uaddr, unsigned long size,
+		      struct ddcb_requ *req);
+
+int  genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m,
+			struct ddcb_requ *req);
+
+struct sg_entry *genwqe_alloc_sgl(struct genwqe_dev *cd, int num_pages,
+				 dma_addr_t *dma_addr, size_t *sgl_size);
+
+void genwqe_free_sgl(struct genwqe_dev *cd, struct sg_entry *sg_list,
+		    dma_addr_t dma_addr, size_t size);
+
+int genwqe_setup_sgl(struct genwqe_dev *cd,
+		    unsigned long offs,
+		    unsigned long size,
+		    struct sg_entry *sgl, /* genwqe sgl */
+		    dma_addr_t dma_addr, size_t sgl_size,
+		    dma_addr_t *dma_list, int page_offs, int num_pages);
+
+int genwqe_check_sgl(struct genwqe_dev *cd, struct sg_entry *sg_list,
+		     int size);
+
+static inline bool dma_mapping_used(struct dma_mapping *m)
+{
+	if (!m)
+		return 0;
+	return m->size != 0;
+}
+
+/**
+ * __genwqe_execute_ddcb() - Execute DDCB request with addr translation
+ *
+ * This function will do the address translation changes to the DDCBs
+ * according to the definitions required by the ATS field. It looks up
+ * the memory allocation buffer or does vmap/vunmap for the respective
+ * user-space buffers, inclusive page pinning and scatter gather list
+ * buildup and teardown.
+ */
+int  __genwqe_execute_ddcb(struct genwqe_dev *cd,
+			   struct genwqe_ddcb_cmd *cmd);
+
+/**
+ * __genwqe_execute_raw_ddcb() - Execute DDCB request without addr translation
+ *
+ * This version will not do address translation or any modifcation of
+ * the DDCB data. It is used e.g. for the MoveFlash DDCB which is
+ * entirely prepared by the driver itself. That means the appropriate
+ * DMA addresses are already in the DDCB and do not need any
+ * modification.
+ */
+int  __genwqe_execute_raw_ddcb(struct genwqe_dev *cd,
+			       struct genwqe_ddcb_cmd *cmd);
+
+int  __genwqe_enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req);
+int  __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req);
+int  __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req);
+
+/* register access */
+int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val);
+u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs);
+int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val);
+u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs);
+
+void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
+				 dma_addr_t *dma_handle);
+void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
+			      void *vaddr, dma_addr_t dma_handle);
+
+/* Base clock frequency in MHz */
+int  genwqe_base_clock_frequency(struct genwqe_dev *cd);
+
+/* Before FFDC is captured the traps should be stopped. */
+void genwqe_stop_traps(struct genwqe_dev *cd);
+void genwqe_start_traps(struct genwqe_dev *cd);
+
+/* Hardware circumvention */
+bool genwqe_need_err_masking(struct genwqe_dev *cd);
+
+/**
+ * genwqe_is_privileged() - Determine operation mode for PCI function
+ *
+ * On Intel with SRIOV support we see:
+ *   PF: is_physfn = 1 is_virtfn = 0
+ *   VF: is_physfn = 0 is_virtfn = 1
+ *
+ * On Systems with no SRIOV support _and_ virtualized systems we get:
+ *       is_physfn = 0 is_virtfn = 0
+ *
+ * Other vendors have individual pci device ids to distinguish between
+ * virtual function drivers and physical function drivers. GenWQE
+ * unfortunately has just on pci device id for both, VFs and PF.
+ *
+ * The following code is used to distinguish if the card is running in
+ * privileged mode, either as true PF or in a virtualized system with
+ * full register access e.g. currently on PowerPC.
+ *
+ * if (pci_dev->is_virtfn)
+ *          cd->is_privileged = 0;
+ *  else
+ *          cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM)
+ *				 != IO_ILLEGAL_VALUE);
+ */
+static inline int genwqe_is_privileged(struct genwqe_dev *cd)
+{
+	return cd->is_privileged;
+}
+
+#endif	/* __CARD_BASE_H__ */
diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c
new file mode 100644
index 000000000000..6f1acc0ccf88
--- /dev/null
+++ b/drivers/misc/genwqe/card_ddcb.c
@@ -0,0 +1,1376 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Device Driver Control Block (DDCB) queue support. Definition of
+ * interrupt handlers for queue support as well as triggering the
+ * health monitor code in case of problems. The current hardware uses
+ * an MSI interrupt which is shared between error handling and
+ * functional code.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/crc-itu-t.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+/*
+ * N: next DDCB, this is where the next DDCB will be put.
+ * A: active DDCB, this is where the code will look for the next completion.
+ * x: DDCB is enqueued, we are waiting for its completion.
+
+ * Situation (1): Empty queue
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  |   |   |   |   |   |   |   |   |
+ *  +---+---+---+---+---+---+---+---+
+ *           A/N
+ *  enqueued_ddcbs = A - N = 2 - 2 = 0
+ *
+ * Situation (2): Wrapped, N > A
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  |   |   | x | x |   |   |   |   |
+ *  +---+---+---+---+---+---+---+---+
+ *            A       N
+ *  enqueued_ddcbs = N - A = 4 - 2 = 2
+ *
+ * Situation (3): Queue wrapped, A > N
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  | x | x |   |   | x | x | x | x |
+ *  +---+---+---+---+---+---+---+---+
+ *            N       A
+ *  enqueued_ddcbs = queue_max  - (A - N) = 8 - (4 - 2) = 6
+ *
+ * Situation (4a): Queue full N > A
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  | x | x | x | x | x | x | x |   |
+ *  +---+---+---+---+---+---+---+---+
+ *    A                           N
+ *
+ *  enqueued_ddcbs = N - A = 7 - 0 = 7
+ *
+ * Situation (4a): Queue full A > N
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  | x | x | x |   | x | x | x | x |
+ *  +---+---+---+---+---+---+---+---+
+ *                N   A
+ *  enqueued_ddcbs = queue_max - (A - N) = 8 - (4 - 3) = 7
+ */
+
+static int queue_empty(struct ddcb_queue *queue)
+{
+	return queue->ddcb_next == queue->ddcb_act;
+}
+
+static int queue_enqueued_ddcbs(struct ddcb_queue *queue)
+{
+	if (queue->ddcb_next >= queue->ddcb_act)
+		return queue->ddcb_next - queue->ddcb_act;
+
+	return queue->ddcb_max - (queue->ddcb_act - queue->ddcb_next);
+}
+
+static int queue_free_ddcbs(struct ddcb_queue *queue)
+{
+	int free_ddcbs = queue->ddcb_max - queue_enqueued_ddcbs(queue) - 1;
+
+	if (WARN_ON_ONCE(free_ddcbs < 0)) { /* must never ever happen! */
+		return 0;
+	}
+	return free_ddcbs;
+}
+
+/*
+ * Use of the PRIV field in the DDCB for queue debugging:
+ *
+ * (1) Trying to get rid of a DDCB which saw a timeout:
+ *     pddcb->priv[6] = 0xcc;   # cleared
+ *
+ * (2) Append a DDCB via NEXT bit:
+ *     pddcb->priv[7] = 0xaa;	# appended
+ *
+ * (3) DDCB needed tapping:
+ *     pddcb->priv[7] = 0xbb;   # tapped
+ *
+ * (4) DDCB marked as correctly finished:
+ *     pddcb->priv[6] = 0xff;	# finished
+ */
+
+static inline void ddcb_mark_tapped(struct ddcb *pddcb)
+{
+	pddcb->priv[7] = 0xbb;  /* tapped */
+}
+
+static inline void ddcb_mark_appended(struct ddcb *pddcb)
+{
+	pddcb->priv[7] = 0xaa;	/* appended */
+}
+
+static inline void ddcb_mark_cleared(struct ddcb *pddcb)
+{
+	pddcb->priv[6] = 0xcc; /* cleared */
+}
+
+static inline void ddcb_mark_finished(struct ddcb *pddcb)
+{
+	pddcb->priv[6] = 0xff;	/* finished */
+}
+
+static inline void ddcb_mark_unused(struct ddcb *pddcb)
+{
+	pddcb->priv_64 = cpu_to_be64(0); /* not tapped */
+}
+
+/**
+ * genwqe_crc16() - Generate 16-bit crc as required for DDCBs
+ * @buff:       pointer to data buffer
+ * @len:        length of data for calculation
+ * @init:       initial crc (0xffff at start)
+ *
+ * Polynomial = x^16 + x^12 + x^5 + 1   (0x1021)
+ * Example: 4 bytes 0x01 0x02 0x03 0x04 with init = 0xffff
+ *          should result in a crc16 of 0x89c3
+ *
+ * Return: crc16 checksum in big endian format !
+ */
+static inline u16 genwqe_crc16(const u8 *buff, size_t len, u16 init)
+{
+	return crc_itu_t(init, buff, len);
+}
+
+static void print_ddcb_info(struct genwqe_dev *cd, struct ddcb_queue *queue)
+{
+	int i;
+	struct ddcb *pddcb;
+	unsigned long flags;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	spin_lock_irqsave(&cd->print_lock, flags);
+
+	dev_info(&pci_dev->dev,
+		 "DDCB list for card #%d (ddcb_act=%d / ddcb_next=%d):\n",
+		 cd->card_idx, queue->ddcb_act, queue->ddcb_next);
+
+	pddcb = queue->ddcb_vaddr;
+	for (i = 0; i < queue->ddcb_max; i++) {
+		dev_err(&pci_dev->dev,
+			"  %c %-3d: RETC=%03x SEQ=%04x "
+			"HSI=%02X SHI=%02x PRIV=%06llx CMD=%03x\n",
+			i == queue->ddcb_act ? '>' : ' ',
+			i,
+			be16_to_cpu(pddcb->retc_16),
+			be16_to_cpu(pddcb->seqnum_16),
+			pddcb->hsi,
+			pddcb->shi,
+			be64_to_cpu(pddcb->priv_64),
+			pddcb->cmd);
+		pddcb++;
+	}
+	spin_unlock_irqrestore(&cd->print_lock, flags);
+}
+
+struct genwqe_ddcb_cmd *ddcb_requ_alloc(void)
+{
+	struct ddcb_requ *req;
+
+	req = kzalloc(sizeof(*req), GFP_ATOMIC);
+	if (!req)
+		return NULL;
+
+	return &req->cmd;
+}
+
+void ddcb_requ_free(struct genwqe_ddcb_cmd *cmd)
+{
+	struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
+	kfree(req);
+}
+
+static inline enum genwqe_requ_state ddcb_requ_get_state(struct ddcb_requ *req)
+{
+	return req->req_state;
+}
+
+static inline void ddcb_requ_set_state(struct ddcb_requ *req,
+				       enum genwqe_requ_state new_state)
+{
+	req->req_state = new_state;
+}
+
+static inline int ddcb_requ_collect_debug_data(struct ddcb_requ *req)
+{
+	return req->cmd.ddata_addr != 0x0;
+}
+
+/**
+ * ddcb_requ_finished() - Returns the hardware state of the associated DDCB
+ * @cd:          pointer to genwqe device descriptor
+ * @req:         DDCB work request
+ *
+ * Status of ddcb_requ mirrors this hardware state, but is copied in
+ * the ddcb_requ on interrupt/polling function. The lowlevel code
+ * should check the hardware state directly, the higher level code
+ * should check the copy.
+ *
+ * This function will also return true if the state of the queue is
+ * not GENWQE_CARD_USED. This enables us to purge all DDCBs in the
+ * shutdown case.
+ */
+static int ddcb_requ_finished(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+	return (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED) ||
+		(cd->card_state != GENWQE_CARD_USED);
+}
+
+/**
+ * enqueue_ddcb() - Enqueue a DDCB
+ * @cd:         pointer to genwqe device descriptor
+ * @queue:	queue this operation should be done on
+ * @ddcb_no:    pointer to ddcb number being tapped
+ *
+ * Start execution of DDCB by tapping or append to queue via NEXT
+ * bit. This is done by an atomic 'compare and swap' instruction and
+ * checking SHI and HSI of the previous DDCB.
+ *
+ * This function must only be called with ddcb_lock held.
+ *
+ * Return: 1 if new DDCB is appended to previous
+ *         2 if DDCB queue is tapped via register/simulation
+ */
+#define RET_DDCB_APPENDED 1
+#define RET_DDCB_TAPPED   2
+
+static int enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_queue *queue,
+			struct ddcb *pddcb, int ddcb_no)
+{
+	unsigned int try;
+	int prev_no;
+	struct ddcb *prev_ddcb;
+	__be32 old, new, icrc_hsi_shi;
+	u64 num;
+
+	/*
+	 * For performance checks a Dispatch Timestamp can be put into
+	 * DDCB It is supposed to use the SLU's free running counter,
+	 * but this requires PCIe cycles.
+	 */
+	ddcb_mark_unused(pddcb);
+
+	/* check previous DDCB if already fetched */
+	prev_no = (ddcb_no == 0) ? queue->ddcb_max - 1 : ddcb_no - 1;
+	prev_ddcb = &queue->ddcb_vaddr[prev_no];
+
+	/*
+	 * It might have happened that the HSI.FETCHED bit is
+	 * set. Retry in this case. Therefore I expect maximum 2 times
+	 * trying.
+	 */
+	ddcb_mark_appended(pddcb);
+	for (try = 0; try < 2; try++) {
+		old = prev_ddcb->icrc_hsi_shi_32; /* read SHI/HSI in BE32 */
+
+		/* try to append via NEXT bit if prev DDCB is not completed */
+		if ((old & DDCB_COMPLETED_BE32) != 0x00000000)
+			break;
+
+		new = (old | DDCB_NEXT_BE32);
+		icrc_hsi_shi = cmpxchg(&prev_ddcb->icrc_hsi_shi_32, old, new);
+
+		if (icrc_hsi_shi == old)
+			return RET_DDCB_APPENDED; /* appended to queue */
+	}
+
+	/* Queue must be re-started by updating QUEUE_OFFSET */
+	ddcb_mark_tapped(pddcb);
+	num = (u64)ddcb_no << 8;
+	__genwqe_writeq(cd, queue->IO_QUEUE_OFFSET, num); /* start queue */
+
+	return RET_DDCB_TAPPED;
+}
+
+/**
+ * copy_ddcb_results() - Copy output state from real DDCB to request
+ *
+ * Copy DDCB ASV to request struct. There is no endian
+ * conversion made, since data structure in ASV is still
+ * unknown here.
+ *
+ * This is needed by:
+ *   - genwqe_purge_ddcb()
+ *   - genwqe_check_ddcb_queue()
+ */
+static void copy_ddcb_results(struct ddcb_requ *req, int ddcb_no)
+{
+	struct ddcb_queue *queue = req->queue;
+	struct ddcb *pddcb = &queue->ddcb_vaddr[req->num];
+
+	memcpy(&req->cmd.asv[0], &pddcb->asv[0], DDCB_ASV_LENGTH);
+
+	/* copy status flags of the variant part */
+	req->cmd.vcrc     = be16_to_cpu(pddcb->vcrc_16);
+	req->cmd.deque_ts = be64_to_cpu(pddcb->deque_ts_64);
+	req->cmd.cmplt_ts = be64_to_cpu(pddcb->cmplt_ts_64);
+
+	req->cmd.attn     = be16_to_cpu(pddcb->attn_16);
+	req->cmd.progress = be32_to_cpu(pddcb->progress_32);
+	req->cmd.retc     = be16_to_cpu(pddcb->retc_16);
+
+	if (ddcb_requ_collect_debug_data(req)) {
+		int prev_no = (ddcb_no == 0) ?
+			queue->ddcb_max - 1 : ddcb_no - 1;
+		struct ddcb *prev_pddcb = &queue->ddcb_vaddr[prev_no];
+
+		memcpy(&req->debug_data.ddcb_finished, pddcb,
+		       sizeof(req->debug_data.ddcb_finished));
+		memcpy(&req->debug_data.ddcb_prev, prev_pddcb,
+		       sizeof(req->debug_data.ddcb_prev));
+	}
+}
+
+/**
+ * genwqe_check_ddcb_queue() - Checks DDCB queue for completed work equests.
+ * @cd:         pointer to genwqe device descriptor
+ *
+ * Return: Number of DDCBs which were finished
+ */
+static int genwqe_check_ddcb_queue(struct genwqe_dev *cd,
+				   struct ddcb_queue *queue)
+{
+	unsigned long flags;
+	int ddcbs_finished = 0;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+	/* FIXME avoid soft locking CPU */
+	while (!queue_empty(queue) && (ddcbs_finished < queue->ddcb_max)) {
+
+		struct ddcb *pddcb;
+		struct ddcb_requ *req;
+		u16 vcrc, vcrc_16, retc_16;
+
+		pddcb = &queue->ddcb_vaddr[queue->ddcb_act];
+
+		if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) ==
+		    0x00000000)
+			goto go_home; /* not completed, continue waiting */
+
+		/* Note: DDCB could be purged */
+
+		req = queue->ddcb_req[queue->ddcb_act];
+		if (req == NULL) {
+			/* this occurs if DDCB is purged, not an error */
+			/* Move active DDCB further; Nothing to do anymore. */
+			goto pick_next_one;
+		}
+
+		/*
+		 * HSI=0x44 (fetched and completed), but RETC is
+		 * 0x101, or even worse 0x000.
+		 *
+		 * In case of seeing the queue in inconsistent state
+		 * we read the errcnts and the queue status to provide
+		 * a trigger for our PCIe analyzer stop capturing.
+		 */
+		retc_16 = be16_to_cpu(pddcb->retc_16);
+		if ((pddcb->hsi == 0x44) && (retc_16 <= 0x101)) {
+			u64 errcnts, status;
+			u64 ddcb_offs = (u64)pddcb - (u64)queue->ddcb_vaddr;
+
+			errcnts = __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS);
+			status  = __genwqe_readq(cd, queue->IO_QUEUE_STATUS);
+
+			dev_err(&pci_dev->dev,
+				"[%s] SEQN=%04x HSI=%02x RETC=%03x "
+				" Q_ERRCNTS=%016llx Q_STATUS=%016llx\n"
+				" DDCB_DMA_ADDR=%016llx\n",
+				__func__, be16_to_cpu(pddcb->seqnum_16),
+				pddcb->hsi, retc_16, errcnts, status,
+				queue->ddcb_daddr + ddcb_offs);
+		}
+
+		copy_ddcb_results(req, queue->ddcb_act);
+		queue->ddcb_req[queue->ddcb_act] = NULL; /* take from queue */
+
+		dev_dbg(&pci_dev->dev, "FINISHED DDCB#%d\n", req->num);
+		genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+		ddcb_mark_finished(pddcb);
+
+		/* calculate CRC_16 to see if VCRC is correct */
+		vcrc = genwqe_crc16(pddcb->asv,
+				   VCRC_LENGTH(req->cmd.asv_length),
+				   0xffff);
+		vcrc_16 = be16_to_cpu(pddcb->vcrc_16);
+		if (vcrc != vcrc_16) {
+			printk_ratelimited(KERN_ERR
+				"%s %s: err: wrong VCRC pre=%02x vcrc_len=%d "
+				"bytes vcrc_data=%04x is not vcrc_card=%04x\n",
+				GENWQE_DEVNAME, dev_name(&pci_dev->dev),
+				pddcb->pre, VCRC_LENGTH(req->cmd.asv_length),
+				vcrc, vcrc_16);
+		}
+
+		ddcb_requ_set_state(req, GENWQE_REQU_FINISHED);
+		queue->ddcbs_completed++;
+		queue->ddcbs_in_flight--;
+
+		/* wake up process waiting for this DDCB */
+		wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]);
+
+pick_next_one:
+		queue->ddcb_act = (queue->ddcb_act + 1) % queue->ddcb_max;
+		ddcbs_finished++;
+	}
+
+ go_home:
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+	return ddcbs_finished;
+}
+
+/**
+ * __genwqe_wait_ddcb(): Waits until DDCB is completed
+ * @cd:         pointer to genwqe device descriptor
+ * @req:        pointer to requsted DDCB parameters
+ *
+ * The Service Layer will update the RETC in DDCB when processing is
+ * pending or done.
+ *
+ * Return: > 0 remaining jiffies, DDCB completed
+ *           -ETIMEDOUT	when timeout
+ *           -ERESTARTSYS when ^C
+ *           -EINVAL when unknown error condition
+ *
+ * When an error is returned the called needs to ensure that
+ * purge_ddcb() is being called to get the &req removed from the
+ * queue.
+ */
+int __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+	int rc;
+	unsigned int ddcb_no;
+	struct ddcb_queue *queue;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (req == NULL)
+		return -EINVAL;
+
+	queue = req->queue;
+	if (queue == NULL)
+		return -EINVAL;
+
+	ddcb_no = req->num;
+	if (ddcb_no >= queue->ddcb_max)
+		return -EINVAL;
+
+	rc = wait_event_interruptible_timeout(queue->ddcb_waitqs[ddcb_no],
+				ddcb_requ_finished(cd, req),
+				genwqe_ddcb_software_timeout * HZ);
+
+	/*
+	 * We need to distinguish 3 cases here:
+	 *   1. rc == 0              timeout occured
+	 *   2. rc == -ERESTARTSYS   signal received
+	 *   3. rc > 0               remaining jiffies condition is true
+	 */
+	if (rc == 0) {
+		struct ddcb_queue *queue = req->queue;
+		struct ddcb *pddcb;
+
+		/*
+		 * Timeout may be caused by long task switching time.
+		 * When timeout happens, check if the request has
+		 * meanwhile completed.
+		 */
+		genwqe_check_ddcb_queue(cd, req->queue);
+		if (ddcb_requ_finished(cd, req))
+			return rc;
+
+		dev_err(&pci_dev->dev,
+			"[%s] err: DDCB#%d timeout rc=%d state=%d req @ %p\n",
+			__func__, req->num, rc,	ddcb_requ_get_state(req),
+			req);
+		dev_err(&pci_dev->dev,
+			"[%s]      IO_QUEUE_STATUS=0x%016llx\n", __func__,
+			__genwqe_readq(cd, queue->IO_QUEUE_STATUS));
+
+		pddcb = &queue->ddcb_vaddr[req->num];
+		genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+		print_ddcb_info(cd, req->queue);
+		return -ETIMEDOUT;
+
+	} else if (rc == -ERESTARTSYS) {
+		return rc;
+		/*
+		 * EINTR:       Stops the application
+		 * ERESTARTSYS: Restartable systemcall; called again
+		 */
+
+	} else if (rc < 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: DDCB#%d unknown result (rc=%d) %d!\n",
+			__func__, req->num, rc, ddcb_requ_get_state(req));
+		return -EINVAL;
+	}
+
+	/* Severe error occured. Driver is forced to stop operation */
+	if (cd->card_state != GENWQE_CARD_USED) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: DDCB#%d forced to stop (rc=%d)\n",
+			__func__, req->num, rc);
+		return -EIO;
+	}
+	return rc;
+}
+
+/**
+ * get_next_ddcb() - Get next available DDCB
+ * @cd:         pointer to genwqe device descriptor
+ *
+ * DDCB's content is completely cleared but presets for PRE and
+ * SEQNUM. This function must only be called when ddcb_lock is held.
+ *
+ * Return: NULL if no empty DDCB available otherwise ptr to next DDCB.
+ */
+static struct ddcb *get_next_ddcb(struct genwqe_dev *cd,
+				  struct ddcb_queue *queue,
+				  int *num)
+{
+	u64 *pu64;
+	struct ddcb *pddcb;
+
+	if (queue_free_ddcbs(queue) == 0) /* queue is  full */
+		return NULL;
+
+	/* find new ddcb */
+	pddcb = &queue->ddcb_vaddr[queue->ddcb_next];
+
+	/* if it is not completed, we are not allowed to use it */
+	/* barrier(); */
+	if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) == 0x00000000)
+		return NULL;
+
+	*num = queue->ddcb_next;	/* internal DDCB number */
+	queue->ddcb_next = (queue->ddcb_next + 1) % queue->ddcb_max;
+
+	/* clear important DDCB fields */
+	pu64 = (u64 *)pddcb;
+	pu64[0] = 0ULL;		/* offs 0x00 (ICRC,HSI,SHI,...) */
+	pu64[1] = 0ULL;		/* offs 0x01 (ACFUNC,CMD...) */
+
+	/* destroy previous results in ASV */
+	pu64[0x80/8] = 0ULL;	/* offs 0x80 (ASV + 0) */
+	pu64[0x88/8] = 0ULL;	/* offs 0x88 (ASV + 0x08) */
+	pu64[0x90/8] = 0ULL;	/* offs 0x90 (ASV + 0x10) */
+	pu64[0x98/8] = 0ULL;	/* offs 0x98 (ASV + 0x18) */
+	pu64[0xd0/8] = 0ULL;	/* offs 0xd0 (RETC,ATTN...) */
+
+	pddcb->pre = DDCB_PRESET_PRE; /* 128 */
+	pddcb->seqnum_16 = cpu_to_be16(queue->ddcb_seq++);
+	return pddcb;
+}
+
+/**
+ * __genwqe_purge_ddcb() - Remove a DDCB from the workqueue
+ * @cd:         genwqe device descriptor
+ * @req:        DDCB request
+ *
+ * This will fail when the request was already FETCHED. In this case
+ * we need to wait until it is finished. Else the DDCB can be
+ * reused. This function also ensures that the request data structure
+ * is removed from ddcb_req[].
+ *
+ * Do not forget to call this function when genwqe_wait_ddcb() fails,
+ * such that the request gets really removed from ddcb_req[].
+ *
+ * Return: 0 success
+ */
+int __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+	struct ddcb *pddcb = NULL;
+	unsigned int t;
+	unsigned long flags;
+	struct ddcb_queue *queue = req->queue;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	u64 queue_status;
+	__be32 icrc_hsi_shi = 0x0000;
+	__be32 old, new;
+
+	/* unsigned long flags; */
+	if (genwqe_ddcb_software_timeout <= 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: software timeout is not set!\n", __func__);
+		return -EFAULT;
+	}
+
+	pddcb = &queue->ddcb_vaddr[req->num];
+
+	for (t = 0; t < genwqe_ddcb_software_timeout * 10; t++) {
+
+		spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+		/* Check if req was meanwhile finished */
+		if (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED)
+			goto go_home;
+
+		/* try to set PURGE bit if FETCHED/COMPLETED are not set */
+		old = pddcb->icrc_hsi_shi_32;	/* read SHI/HSI in BE32 */
+		if ((old & DDCB_FETCHED_BE32) == 0x00000000) {
+
+			new = (old | DDCB_PURGE_BE32);
+			icrc_hsi_shi = cmpxchg(&pddcb->icrc_hsi_shi_32,
+					       old, new);
+			if (icrc_hsi_shi == old)
+				goto finish_ddcb;
+		}
+
+		/* normal finish with HSI bit */
+		barrier();
+		icrc_hsi_shi = pddcb->icrc_hsi_shi_32;
+		if (icrc_hsi_shi & DDCB_COMPLETED_BE32)
+			goto finish_ddcb;
+
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+		/*
+		 * Here the check_ddcb() function will most likely
+		 * discover this DDCB to be finished some point in
+		 * time. It will mark the req finished and free it up
+		 * in the list.
+		 */
+
+		copy_ddcb_results(req, req->num); /* for the failing case */
+		msleep(100); /* sleep for 1/10 second and try again */
+		continue;
+
+finish_ddcb:
+		copy_ddcb_results(req, req->num);
+		ddcb_requ_set_state(req, GENWQE_REQU_FINISHED);
+		queue->ddcbs_in_flight--;
+		queue->ddcb_req[req->num] = NULL; /* delete from array */
+		ddcb_mark_cleared(pddcb);
+
+		/* Move active DDCB further; Nothing to do here anymore. */
+
+		/*
+		 * We need to ensure that there is at least one free
+		 * DDCB in the queue. To do that, we must update
+		 * ddcb_act only if the COMPLETED bit is set for the
+		 * DDCB we are working on else we treat that DDCB even
+		 * if we PURGED it as occupied (hardware is supposed
+		 * to set the COMPLETED bit yet!).
+		 */
+		icrc_hsi_shi = pddcb->icrc_hsi_shi_32;
+		if ((icrc_hsi_shi & DDCB_COMPLETED_BE32) &&
+		    (queue->ddcb_act == req->num)) {
+			queue->ddcb_act = ((queue->ddcb_act + 1) %
+					   queue->ddcb_max);
+		}
+go_home:
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+		return 0;
+	}
+
+	/*
+	 * If the card is dead and the queue is forced to stop, we
+	 * might see this in the queue status register.
+	 */
+	queue_status = __genwqe_readq(cd, queue->IO_QUEUE_STATUS);
+
+	dev_dbg(&pci_dev->dev, "UN/FINISHED DDCB#%d\n", req->num);
+	genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+	dev_err(&pci_dev->dev,
+		"[%s] err: DDCB#%d not purged and not completed "
+		"after %d seconds QSTAT=%016llx!!\n",
+		__func__, req->num, genwqe_ddcb_software_timeout,
+		queue_status);
+
+	print_ddcb_info(cd, req->queue);
+
+	return -EFAULT;
+}
+
+int genwqe_init_debug_data(struct genwqe_dev *cd, struct genwqe_debug_data *d)
+{
+	int len;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (d == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: invalid memory for debug data!\n",
+			__func__);
+		return -EFAULT;
+	}
+
+	len  = sizeof(d->driver_version);
+	snprintf(d->driver_version, len, "%s", DRV_VERS_STRING);
+	d->slu_unitcfg = cd->slu_unitcfg;
+	d->app_unitcfg = cd->app_unitcfg;
+	return 0;
+}
+
+/**
+ * __genwqe_enqueue_ddcb() - Enqueue a DDCB
+ * @cd:          pointer to genwqe device descriptor
+ * @req:         pointer to DDCB execution request
+ *
+ * Return: 0 if enqueuing succeeded
+ *         -EIO if card is unusable/PCIe problems
+ *         -EBUSY if enqueuing failed
+ */
+int __genwqe_enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+	struct ddcb *pddcb;
+	unsigned long flags;
+	struct ddcb_queue *queue;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	u16 icrc;
+
+	if (cd->card_state != GENWQE_CARD_USED) {
+		printk_ratelimited(KERN_ERR
+			"%s %s: [%s] Card is unusable/PCIe problem Req#%d\n",
+			GENWQE_DEVNAME, dev_name(&pci_dev->dev),
+			__func__, req->num);
+		return -EIO;
+	}
+
+	queue = req->queue = &cd->queue;
+
+	/* FIXME circumvention to improve performance when no irq is
+	 * there.
+	 */
+	if (genwqe_polling_enabled)
+		genwqe_check_ddcb_queue(cd, queue);
+
+	/*
+	 * It must be ensured to process all DDCBs in successive
+	 * order. Use a lock here in order to prevent nested DDCB
+	 * enqueuing.
+	 */
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+	pddcb = get_next_ddcb(cd, queue, &req->num);	/* get ptr and num */
+	if (pddcb == NULL) {
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+		queue->busy++;
+		return -EBUSY;
+	}
+
+	if (queue->ddcb_req[req->num] != NULL) {
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+		dev_err(&pci_dev->dev,
+			"[%s] picked DDCB %d with req=%p still in use!!\n",
+			__func__, req->num, req);
+		return -EFAULT;
+	}
+	ddcb_requ_set_state(req, GENWQE_REQU_ENQUEUED);
+	queue->ddcb_req[req->num] = req;
+
+	pddcb->cmdopts_16 = cpu_to_be16(req->cmd.cmdopts);
+	pddcb->cmd = req->cmd.cmd;
+	pddcb->acfunc = req->cmd.acfunc;	/* functional unit */
+
+	/*
+	 * We know that we can get retc 0x104 with CRC error, do not
+	 * stop the queue in those cases for this command. XDIR = 1
+	 * does not work for old SLU versions.
+	 *
+	 * Last bitstream with the old XDIR behavior had SLU_ID
+	 * 0x34199.
+	 */
+	if ((cd->slu_unitcfg & 0xFFFF0ull) > 0x34199ull)
+		pddcb->xdir = 0x1;
+	else
+		pddcb->xdir = 0x0;
+
+
+	pddcb->psp = (((req->cmd.asiv_length / 8) << 4) |
+		      ((req->cmd.asv_length  / 8)));
+	pddcb->disp_ts_64 = cpu_to_be64(req->cmd.disp_ts);
+
+	/*
+	 * If copying the whole DDCB_ASIV_LENGTH is impacting
+	 * performance we need to change it to
+	 * req->cmd.asiv_length. But simulation benefits from some
+	 * non-architectured bits behind the architectured content.
+	 *
+	 * How much data is copied depends on the availability of the
+	 * ATS field, which was introduced late. If the ATS field is
+	 * supported ASIV is 8 bytes shorter than it used to be. Since
+	 * the ATS field is copied too, the code should do exactly
+	 * what it did before, but I wanted to make copying of the ATS
+	 * field very explicit.
+	 */
+	if (genwqe_get_slu_id(cd) <= 0x2) {
+		memcpy(&pddcb->__asiv[0],	/* destination */
+		       &req->cmd.__asiv[0],	/* source */
+		       DDCB_ASIV_LENGTH);	/* req->cmd.asiv_length */
+	} else {
+		pddcb->n.ats_64 = cpu_to_be64(req->cmd.ats);
+		memcpy(&pddcb->n.asiv[0],	/* destination */
+			&req->cmd.asiv[0],	/* source */
+			DDCB_ASIV_LENGTH_ATS);	/* req->cmd.asiv_length */
+	}
+
+	pddcb->icrc_hsi_shi_32 = cpu_to_be32(0x00000000); /* for crc */
+
+	/*
+	 * Calculate CRC_16 for corresponding range PSP(7:4). Include
+	 * empty 4 bytes prior to the data.
+	 */
+	icrc = genwqe_crc16((const u8 *)pddcb,
+			   ICRC_LENGTH(req->cmd.asiv_length), 0xffff);
+	pddcb->icrc_hsi_shi_32 = cpu_to_be32((u32)icrc << 16);
+
+	/* enable DDCB completion irq */
+	if (!genwqe_polling_enabled)
+		pddcb->icrc_hsi_shi_32 |= DDCB_INTR_BE32;
+
+	dev_dbg(&pci_dev->dev, "INPUT DDCB#%d\n", req->num);
+	genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+	if (ddcb_requ_collect_debug_data(req)) {
+		/* use the kernel copy of debug data. copying back to
+		   user buffer happens later */
+
+		genwqe_init_debug_data(cd, &req->debug_data);
+		memcpy(&req->debug_data.ddcb_before, pddcb,
+		       sizeof(req->debug_data.ddcb_before));
+	}
+
+	enqueue_ddcb(cd, queue, pddcb, req->num);
+	queue->ddcbs_in_flight++;
+
+	if (queue->ddcbs_in_flight > queue->ddcbs_max_in_flight)
+		queue->ddcbs_max_in_flight = queue->ddcbs_in_flight;
+
+	ddcb_requ_set_state(req, GENWQE_REQU_TAPPED);
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+	wake_up_interruptible(&cd->queue_waitq);
+
+	return 0;
+}
+
+/**
+ * __genwqe_execute_raw_ddcb() - Setup and execute DDCB
+ * @cd:         pointer to genwqe device descriptor
+ * @req:        user provided DDCB request
+ */
+int __genwqe_execute_raw_ddcb(struct genwqe_dev *cd,
+			     struct genwqe_ddcb_cmd *cmd)
+{
+	int rc = 0;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
+
+	if (cmd->asiv_length > DDCB_ASIV_LENGTH) {
+		dev_err(&pci_dev->dev, "[%s] err: wrong asiv_length of %d\n",
+			__func__, cmd->asiv_length);
+		return -EINVAL;
+	}
+	if (cmd->asv_length > DDCB_ASV_LENGTH) {
+		dev_err(&pci_dev->dev, "[%s] err: wrong asv_length of %d\n",
+			__func__, cmd->asiv_length);
+		return -EINVAL;
+	}
+	rc = __genwqe_enqueue_ddcb(cd, req);
+	if (rc != 0)
+		return rc;
+
+	rc = __genwqe_wait_ddcb(cd, req);
+	if (rc < 0)		/* error or signal interrupt */
+		goto err_exit;
+
+	if (ddcb_requ_collect_debug_data(req)) {
+		if (copy_to_user((struct genwqe_debug_data __user *)
+				 (unsigned long)cmd->ddata_addr,
+				 &req->debug_data,
+				 sizeof(struct genwqe_debug_data)))
+			return -EFAULT;
+	}
+
+	/*
+	 * Higher values than 0x102 indicate completion with faults,
+	 * lower values than 0x102 indicate processing faults. Note
+	 * that DDCB might have been purged. E.g. Cntl+C.
+	 */
+	if (cmd->retc != DDCB_RETC_COMPLETE) {
+		/* This might happen e.g. flash read, and needs to be
+		   handled by the upper layer code. */
+		rc = -EBADMSG;	/* not processed/error retc */
+	}
+
+	return rc;
+
+ err_exit:
+	__genwqe_purge_ddcb(cd, req);
+
+	if (ddcb_requ_collect_debug_data(req)) {
+		if (copy_to_user((struct genwqe_debug_data __user *)
+				 (unsigned long)cmd->ddata_addr,
+				 &req->debug_data,
+				 sizeof(struct genwqe_debug_data)))
+			return -EFAULT;
+	}
+	return rc;
+}
+
+/**
+ * genwqe_next_ddcb_ready() - Figure out if the next DDCB is already finished
+ *
+ * We use this as condition for our wait-queue code.
+ */
+static int genwqe_next_ddcb_ready(struct genwqe_dev *cd)
+{
+	unsigned long flags;
+	struct ddcb *pddcb;
+	struct ddcb_queue *queue = &cd->queue;
+
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+	if (queue_empty(queue)) { /* emtpy queue */
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+		return 0;
+	}
+
+	pddcb = &queue->ddcb_vaddr[queue->ddcb_act];
+	if (pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) { /* ddcb ready */
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+		return 1;
+	}
+
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+	return 0;
+}
+
+/**
+ * genwqe_ddcbs_in_flight() - Check how many DDCBs are in flight
+ *
+ * Keep track on the number of DDCBs which ware currently in the
+ * queue. This is needed for statistics as well as conditon if we want
+ * to wait or better do polling in case of no interrupts available.
+ */
+int genwqe_ddcbs_in_flight(struct genwqe_dev *cd)
+{
+	unsigned long flags;
+	int ddcbs_in_flight = 0;
+	struct ddcb_queue *queue = &cd->queue;
+
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+	ddcbs_in_flight += queue->ddcbs_in_flight;
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+	return ddcbs_in_flight;
+}
+
+static int setup_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue)
+{
+	int rc, i;
+	struct ddcb *pddcb;
+	u64 val64;
+	unsigned int queue_size;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (genwqe_ddcb_max < 2)
+		return -EINVAL;
+
+	queue_size = roundup(genwqe_ddcb_max * sizeof(struct ddcb), PAGE_SIZE);
+
+	queue->ddcbs_in_flight = 0;  /* statistics */
+	queue->ddcbs_max_in_flight = 0;
+	queue->ddcbs_completed = 0;
+	queue->busy = 0;
+
+	queue->ddcb_seq	  = 0x100; /* start sequence number */
+	queue->ddcb_max	  = genwqe_ddcb_max; /* module parameter */
+	queue->ddcb_vaddr = __genwqe_alloc_consistent(cd, queue_size,
+						&queue->ddcb_daddr);
+	if (queue->ddcb_vaddr == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] **err: could not allocate DDCB **\n", __func__);
+		return -ENOMEM;
+	}
+	memset(queue->ddcb_vaddr, 0, queue_size);
+
+	queue->ddcb_req = kzalloc(sizeof(struct ddcb_requ *) *
+				  queue->ddcb_max, GFP_KERNEL);
+	if (!queue->ddcb_req) {
+		rc = -ENOMEM;
+		goto free_ddcbs;
+	}
+
+	queue->ddcb_waitqs = kzalloc(sizeof(wait_queue_head_t) *
+				     queue->ddcb_max, GFP_KERNEL);
+	if (!queue->ddcb_waitqs) {
+		rc = -ENOMEM;
+		goto free_requs;
+	}
+
+	for (i = 0; i < queue->ddcb_max; i++) {
+		pddcb = &queue->ddcb_vaddr[i];		     /* DDCBs */
+		pddcb->icrc_hsi_shi_32 = DDCB_COMPLETED_BE32;
+		pddcb->retc_16 = cpu_to_be16(0xfff);
+
+		queue->ddcb_req[i] = NULL;		     /* requests */
+		init_waitqueue_head(&queue->ddcb_waitqs[i]); /* waitqueues */
+	}
+
+	queue->ddcb_act  = 0;
+	queue->ddcb_next = 0;	/* queue is empty */
+
+	spin_lock_init(&queue->ddcb_lock);
+	init_waitqueue_head(&queue->ddcb_waitq);
+
+	val64 = ((u64)(queue->ddcb_max - 1) <<  8); /* lastptr */
+	__genwqe_writeq(cd, queue->IO_QUEUE_CONFIG,  0x07);  /* iCRC/vCRC */
+	__genwqe_writeq(cd, queue->IO_QUEUE_SEGMENT, queue->ddcb_daddr);
+	__genwqe_writeq(cd, queue->IO_QUEUE_INITSQN, queue->ddcb_seq);
+	__genwqe_writeq(cd, queue->IO_QUEUE_WRAP,    val64);
+	return 0;
+
+ free_requs:
+	kfree(queue->ddcb_req);
+	queue->ddcb_req = NULL;
+ free_ddcbs:
+	__genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr,
+				queue->ddcb_daddr);
+	queue->ddcb_vaddr = NULL;
+	queue->ddcb_daddr = 0ull;
+	return -ENODEV;
+
+}
+
+static int ddcb_queue_initialized(struct ddcb_queue *queue)
+{
+	return queue->ddcb_vaddr != NULL;
+}
+
+static void free_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue)
+{
+	unsigned int queue_size;
+
+	queue_size = roundup(queue->ddcb_max * sizeof(struct ddcb), PAGE_SIZE);
+
+	kfree(queue->ddcb_req);
+	queue->ddcb_req = NULL;
+
+	if (queue->ddcb_vaddr) {
+		__genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr,
+					queue->ddcb_daddr);
+		queue->ddcb_vaddr = NULL;
+		queue->ddcb_daddr = 0ull;
+	}
+}
+
+static irqreturn_t genwqe_pf_isr(int irq, void *dev_id)
+{
+	u64 gfir;
+	struct genwqe_dev *cd = (struct genwqe_dev *)dev_id;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	/*
+	 * In case of fatal FIR error the queue is stopped, such that
+	 * we can safely check it without risking anything.
+	 */
+	cd->irqs_processed++;
+	wake_up_interruptible(&cd->queue_waitq);
+
+	/*
+	 * Checking for errors before kicking the queue might be
+	 * safer, but slower for the good-case ... See above.
+	 */
+	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	if ((gfir & GFIR_ERR_TRIGGER) != 0x0) {
+
+		wake_up_interruptible(&cd->health_waitq);
+
+		/*
+		 * By default GFIRs causes recovery actions. This
+		 * count is just for debug when recovery is masked.
+		 */
+		printk_ratelimited(KERN_ERR
+				   "%s %s: [%s] GFIR=%016llx\n",
+				   GENWQE_DEVNAME, dev_name(&pci_dev->dev),
+				   __func__, gfir);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t genwqe_vf_isr(int irq, void *dev_id)
+{
+	struct genwqe_dev *cd = (struct genwqe_dev *)dev_id;
+
+	cd->irqs_processed++;
+	wake_up_interruptible(&cd->queue_waitq);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * genwqe_card_thread() - Work thread for the DDCB queue
+ *
+ * The idea is to check if there are DDCBs in processing. If there are
+ * some finished DDCBs, we process them and wakeup the
+ * requestors. Otherwise we give other processes time using
+ * cond_resched().
+ */
+static int genwqe_card_thread(void *data)
+{
+	int should_stop = 0, rc = 0;
+	struct genwqe_dev *cd = (struct genwqe_dev *)data;
+
+	while (!kthread_should_stop()) {
+
+		genwqe_check_ddcb_queue(cd, &cd->queue);
+
+		if (genwqe_polling_enabled) {
+			rc = wait_event_interruptible_timeout(
+				cd->queue_waitq,
+				genwqe_ddcbs_in_flight(cd) ||
+				(should_stop = kthread_should_stop()), 1);
+		} else {
+			rc = wait_event_interruptible_timeout(
+				cd->queue_waitq,
+				genwqe_next_ddcb_ready(cd) ||
+				(should_stop = kthread_should_stop()), HZ);
+		}
+		if (should_stop)
+			break;
+
+		/*
+		 * Avoid soft lockups on heavy loads; we do not want
+		 * to disable our interrupts.
+		 */
+		cond_resched();
+	}
+	return 0;
+}
+
+/**
+ * genwqe_setup_service_layer() - Setup DDCB queue
+ * @cd:         pointer to genwqe device descriptor
+ *
+ * Allocate DDCBs. Configure Service Layer Controller (SLC).
+ *
+ * Return: 0 success
+ */
+int genwqe_setup_service_layer(struct genwqe_dev *cd)
+{
+	int rc;
+	struct ddcb_queue *queue;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (genwqe_is_privileged(cd)) {
+		rc = genwqe_card_reset(cd);
+		if (rc < 0) {
+			dev_err(&pci_dev->dev,
+				"[%s] err: reset failed.\n", __func__);
+			return rc;
+		}
+		genwqe_read_softreset(cd);
+	}
+
+	queue = &cd->queue;
+	queue->IO_QUEUE_CONFIG  = IO_SLC_QUEUE_CONFIG;
+	queue->IO_QUEUE_STATUS  = IO_SLC_QUEUE_STATUS;
+	queue->IO_QUEUE_SEGMENT = IO_SLC_QUEUE_SEGMENT;
+	queue->IO_QUEUE_INITSQN = IO_SLC_QUEUE_INITSQN;
+	queue->IO_QUEUE_OFFSET  = IO_SLC_QUEUE_OFFSET;
+	queue->IO_QUEUE_WRAP    = IO_SLC_QUEUE_WRAP;
+	queue->IO_QUEUE_WTIME   = IO_SLC_QUEUE_WTIME;
+	queue->IO_QUEUE_ERRCNTS = IO_SLC_QUEUE_ERRCNTS;
+	queue->IO_QUEUE_LRW     = IO_SLC_QUEUE_LRW;
+
+	rc = setup_ddcb_queue(cd, queue);
+	if (rc != 0) {
+		rc = -ENODEV;
+		goto err_out;
+	}
+
+	init_waitqueue_head(&cd->queue_waitq);
+	cd->card_thread = kthread_run(genwqe_card_thread, cd,
+				      GENWQE_DEVNAME "%d_thread",
+				      cd->card_idx);
+	if (IS_ERR(cd->card_thread)) {
+		rc = PTR_ERR(cd->card_thread);
+		cd->card_thread = NULL;
+		goto stop_free_queue;
+	}
+
+	rc = genwqe_set_interrupt_capability(cd, GENWQE_MSI_IRQS);
+	if (rc > 0)
+		rc = genwqe_set_interrupt_capability(cd, rc);
+	if (rc != 0) {
+		rc = -ENODEV;
+		goto stop_kthread;
+	}
+
+	/*
+	 * We must have all wait-queues initialized when we enable the
+	 * interrupts. Otherwise we might crash if we get an early
+	 * irq.
+	 */
+	init_waitqueue_head(&cd->health_waitq);
+
+	if (genwqe_is_privileged(cd)) {
+		rc = request_irq(pci_dev->irq, genwqe_pf_isr, IRQF_SHARED,
+				 GENWQE_DEVNAME, cd);
+	} else {
+		rc = request_irq(pci_dev->irq, genwqe_vf_isr, IRQF_SHARED,
+				 GENWQE_DEVNAME, cd);
+	}
+	if (rc < 0) {
+		dev_err(&pci_dev->dev, "irq %d not free.\n", pci_dev->irq);
+		goto stop_irq_cap;
+	}
+
+	cd->card_state = GENWQE_CARD_USED;
+	return 0;
+
+ stop_irq_cap:
+	genwqe_reset_interrupt_capability(cd);
+ stop_kthread:
+	kthread_stop(cd->card_thread);
+	cd->card_thread = NULL;
+ stop_free_queue:
+	free_ddcb_queue(cd, queue);
+ err_out:
+	return rc;
+}
+
+/**
+ * queue_wake_up_all() - Handles fatal error case
+ *
+ * The PCI device got unusable and we have to stop all pending
+ * requests as fast as we can. The code after this must purge the
+ * DDCBs in question and ensure that all mappings are freed.
+ */
+static int queue_wake_up_all(struct genwqe_dev *cd)
+{
+	unsigned int i;
+	unsigned long flags;
+	struct ddcb_queue *queue = &cd->queue;
+
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+	for (i = 0; i < queue->ddcb_max; i++)
+		wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]);
+
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+	return 0;
+}
+
+/**
+ * genwqe_finish_queue() - Remove any genwqe devices and user-interfaces
+ *
+ * Relies on the pre-condition that there are no users of the card
+ * device anymore e.g. with open file-descriptors.
+ *
+ * This function must be robust enough to be called twice.
+ */
+int genwqe_finish_queue(struct genwqe_dev *cd)
+{
+	int i, rc, in_flight;
+	int waitmax = genwqe_ddcb_software_timeout;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	struct ddcb_queue *queue = &cd->queue;
+
+	if (!ddcb_queue_initialized(queue))
+		return 0;
+
+	/* Do not wipe out the error state. */
+	if (cd->card_state == GENWQE_CARD_USED)
+		cd->card_state = GENWQE_CARD_UNUSED;
+
+	/* Wake up all requests in the DDCB queue such that they
+	   should be removed nicely. */
+	queue_wake_up_all(cd);
+
+	/* We must wait to get rid of the DDCBs in flight */
+	for (i = 0; i < waitmax; i++) {
+		in_flight = genwqe_ddcbs_in_flight(cd);
+
+		if (in_flight == 0)
+			break;
+
+		dev_dbg(&pci_dev->dev,
+			"  DEBUG [%d/%d] waiting for queue to get empty: "
+			"%d requests!\n", i, waitmax, in_flight);
+
+		/*
+		 * Severe severe error situation: The card itself has
+		 * 16 DDCB queues, each queue has e.g. 32 entries,
+		 * each DDBC has a hardware timeout of currently 250
+		 * msec but the PFs have a hardware timeout of 8 sec
+		 * ... so I take something large.
+		 */
+		msleep(1000);
+	}
+	if (i == waitmax) {
+		dev_err(&pci_dev->dev, "  [%s] err: queue is not empty!!\n",
+			__func__);
+		rc = -EIO;
+	}
+	return rc;
+}
+
+/**
+ * genwqe_release_service_layer() - Shutdown DDCB queue
+ * @cd:       genwqe device descriptor
+ *
+ * This function must be robust enough to be called twice.
+ */
+int genwqe_release_service_layer(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!ddcb_queue_initialized(&cd->queue))
+		return 1;
+
+	free_irq(pci_dev->irq, cd);
+	genwqe_reset_interrupt_capability(cd);
+
+	if (cd->card_thread != NULL) {
+		kthread_stop(cd->card_thread);
+		cd->card_thread = NULL;
+	}
+
+	free_ddcb_queue(cd, &cd->queue);
+	return 0;
+}
diff --git a/drivers/misc/genwqe/card_ddcb.h b/drivers/misc/genwqe/card_ddcb.h
new file mode 100644
index 000000000000..c4f26720753e
--- /dev/null
+++ b/drivers/misc/genwqe/card_ddcb.h
@@ -0,0 +1,188 @@
+#ifndef __CARD_DDCB_H__
+#define __CARD_DDCB_H__
+
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#include "genwqe_driver.h"
+#include "card_base.h"
+
+/**
+ * struct ddcb - Device Driver Control Block DDCB
+ * @hsi:        Hardware software interlock
+ * @shi:        Software hardware interlock. Hsi and shi are used to interlock
+ *              software and hardware activities. We are using a compare and
+ *              swap operation to ensure that there are no races when
+ *              activating new DDCBs on the queue, or when we need to
+ *              purge a DDCB from a running queue.
+ * @acfunc:     Accelerator function addresses a unit within the chip
+ * @cmd:        Command to work on
+ * @cmdopts_16: Options for the command
+ * @asiv:       Input data
+ * @asv:        Output data
+ *
+ * The DDCB data format is big endian. Multiple consequtive DDBCs form
+ * a DDCB queue.
+ */
+#define ASIV_LENGTH		104 /* Old specification without ATS field */
+#define ASIV_LENGTH_ATS		96  /* New specification with ATS field */
+#define ASV_LENGTH		64
+
+struct ddcb {
+	union {
+		__be32 icrc_hsi_shi_32;	/* iCRC, Hardware/SW interlock */
+		struct {
+			__be16	icrc_16;
+			u8	hsi;
+			u8	shi;
+		};
+	};
+	u8  pre;		/* Preamble */
+	u8  xdir;		/* Execution Directives */
+	__be16 seqnum_16;	/* Sequence Number */
+
+	u8  acfunc;		/* Accelerator Function.. */
+	u8  cmd;		/* Command. */
+	__be16 cmdopts_16;	/* Command Options */
+	u8  sur;		/* Status Update Rate */
+	u8  psp;		/* Protection Section Pointer */
+	__be16 rsvd_0e_16;	/* Reserved invariant */
+
+	__be64 fwiv_64;		/* Firmware Invariant. */
+
+	union {
+		struct {
+			__be64 ats_64;  /* Address Translation Spec */
+			u8     asiv[ASIV_LENGTH_ATS]; /* New ASIV */
+		} n;
+		u8  __asiv[ASIV_LENGTH];	/* obsolete */
+	};
+	u8     asv[ASV_LENGTH];	/* Appl Spec Variant */
+
+	__be16 rsvd_c0_16;	/* Reserved Variant */
+	__be16 vcrc_16;		/* Variant CRC */
+	__be32 rsvd_32;		/* Reserved unprotected */
+
+	__be64 deque_ts_64;	/* Deque Time Stamp. */
+
+	__be16 retc_16;		/* Return Code */
+	__be16 attn_16;		/* Attention/Extended Error Codes */
+	__be32 progress_32;	/* Progress indicator. */
+
+	__be64 cmplt_ts_64;	/* Completion Time Stamp. */
+
+	/* The following layout matches the new service layer format */
+	__be32 ibdc_32;		/* Inbound Data Count  (* 256) */
+	__be32 obdc_32;		/* Outbound Data Count (* 256) */
+
+	__be64 rsvd_SLH_64;	/* Reserved for hardware */
+	union {			/* private data for driver */
+		u8	priv[8];
+		__be64	priv_64;
+	};
+	__be64 disp_ts_64;	/* Dispatch TimeStamp */
+} __attribute__((__packed__));
+
+/* CRC polynomials for DDCB */
+#define CRC16_POLYNOMIAL	0x1021
+
+/*
+ * SHI: Software to Hardware Interlock
+ *   This 1 byte field is written by software to interlock the
+ *   movement of one queue entry to another with the hardware in the
+ *   chip.
+ */
+#define DDCB_SHI_INTR		0x04 /* Bit 2 */
+#define DDCB_SHI_PURGE		0x02 /* Bit 1 */
+#define DDCB_SHI_NEXT		0x01 /* Bit 0 */
+
+/*
+ * HSI: Hardware to Software interlock
+ * This 1 byte field is written by hardware to interlock the movement
+ * of one queue entry to another with the software in the chip.
+ */
+#define DDCB_HSI_COMPLETED	0x40 /* Bit 6 */
+#define DDCB_HSI_FETCHED	0x04 /* Bit 2 */
+
+/*
+ * Accessing HSI/SHI is done 32-bit wide
+ *   Normally 16-bit access would work too, but on some platforms the
+ *   16 compare and swap operation is not supported. Therefore
+ *   switching to 32-bit such that those platforms will work too.
+ *
+ *                                         iCRC HSI/SHI
+ */
+#define DDCB_INTR_BE32		cpu_to_be32(0x00000004)
+#define DDCB_PURGE_BE32		cpu_to_be32(0x00000002)
+#define DDCB_NEXT_BE32		cpu_to_be32(0x00000001)
+#define DDCB_COMPLETED_BE32	cpu_to_be32(0x00004000)
+#define DDCB_FETCHED_BE32	cpu_to_be32(0x00000400)
+
+/* Definitions of DDCB presets */
+#define DDCB_PRESET_PRE		0x80
+#define ICRC_LENGTH(n)		((n) + 8 + 8 + 8)  /* used ASIV + hdr fields */
+#define VCRC_LENGTH(n)		((n))		   /* used ASV */
+
+/*
+ * Genwqe Scatter Gather list
+ *   Each element has up to 8 entries.
+ *   The chaining element is element 0 cause of prefetching needs.
+ */
+
+/*
+ * 0b0110 Chained descriptor. The descriptor is describing the next
+ * descriptor list.
+ */
+#define SG_CHAINED		(0x6)
+
+/*
+ * 0b0010 First entry of a descriptor list. Start from a Buffer-Empty
+ * condition.
+ */
+#define SG_DATA			(0x2)
+
+/*
+ * 0b0000 Early terminator. This is the last entry on the list
+ * irregardless of the length indicated.
+ */
+#define SG_END_LIST		(0x0)
+
+/**
+ * struct sglist - Scatter gather list
+ * @target_addr:       Either a dma addr of memory to work on or a
+ *                     dma addr or a subsequent sglist block.
+ * @len:               Length of the data block.
+ * @flags:             See above.
+ *
+ * Depending on the command the GenWQE card can use a scatter gather
+ * list to describe the memory it works on. Always 8 sg_entry's form
+ * a block.
+ */
+struct sg_entry {
+	__be64 target_addr;
+	__be32 len;
+	__be32 flags;
+};
+
+#endif /* __CARD_DDCB_H__ */
diff --git a/drivers/misc/genwqe/card_debugfs.c b/drivers/misc/genwqe/card_debugfs.c
new file mode 100644
index 000000000000..3bfdc07a7248
--- /dev/null
+++ b/drivers/misc/genwqe/card_debugfs.c
@@ -0,0 +1,500 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Debugfs interfaces for the GenWQE card. Help to debug potential
+ * problems. Dump internal chip state for debugging and failure
+ * determination.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+#define GENWQE_DEBUGFS_RO(_name, _showfn)				\
+	static int genwqe_debugfs_##_name##_open(struct inode *inode,	\
+						 struct file *file)	\
+	{								\
+		return single_open(file, _showfn, inode->i_private);	\
+	}								\
+	static const struct file_operations genwqe_##_name##_fops = {	\
+		.open = genwqe_debugfs_##_name##_open,			\
+		.read = seq_read,					\
+		.llseek = seq_lseek,					\
+		.release = single_release,				\
+	}
+
+static void dbg_uidn_show(struct seq_file *s, struct genwqe_reg *regs,
+			  int entries)
+{
+	unsigned int i;
+	u32 v_hi, v_lo;
+
+	for (i = 0; i < entries; i++) {
+		v_hi = (regs[i].val >> 32) & 0xffffffff;
+		v_lo = (regs[i].val)       & 0xffffffff;
+
+		seq_printf(s, "  0x%08x 0x%08x 0x%08x 0x%08x EXT_ERR_REC\n",
+			   regs[i].addr, regs[i].idx, v_hi, v_lo);
+	}
+}
+
+static int curr_dbg_uidn_show(struct seq_file *s, void *unused, int uid)
+{
+	struct genwqe_dev *cd = s->private;
+	int entries;
+	struct genwqe_reg *regs;
+
+	entries = genwqe_ffdc_buff_size(cd, uid);
+	if (entries < 0)
+		return -EINVAL;
+
+	if (entries == 0)
+		return 0;
+
+	regs = kcalloc(entries, sizeof(*regs), GFP_KERNEL);
+	if (regs == NULL)
+		return -ENOMEM;
+
+	genwqe_stop_traps(cd); /* halt the traps while dumping data */
+	genwqe_ffdc_buff_read(cd, uid, regs, entries);
+	genwqe_start_traps(cd);
+
+	dbg_uidn_show(s, regs, entries);
+	kfree(regs);
+	return 0;
+}
+
+static int genwqe_curr_dbg_uid0_show(struct seq_file *s, void *unused)
+{
+	return curr_dbg_uidn_show(s, unused, 0);
+}
+
+GENWQE_DEBUGFS_RO(curr_dbg_uid0, genwqe_curr_dbg_uid0_show);
+
+static int genwqe_curr_dbg_uid1_show(struct seq_file *s, void *unused)
+{
+	return curr_dbg_uidn_show(s, unused, 1);
+}
+
+GENWQE_DEBUGFS_RO(curr_dbg_uid1, genwqe_curr_dbg_uid1_show);
+
+static int genwqe_curr_dbg_uid2_show(struct seq_file *s, void *unused)
+{
+	return curr_dbg_uidn_show(s, unused, 2);
+}
+
+GENWQE_DEBUGFS_RO(curr_dbg_uid2, genwqe_curr_dbg_uid2_show);
+
+static int prev_dbg_uidn_show(struct seq_file *s, void *unused, int uid)
+{
+	struct genwqe_dev *cd = s->private;
+
+	dbg_uidn_show(s, cd->ffdc[uid].regs,  cd->ffdc[uid].entries);
+	return 0;
+}
+
+static int genwqe_prev_dbg_uid0_show(struct seq_file *s, void *unused)
+{
+	return prev_dbg_uidn_show(s, unused, 0);
+}
+
+GENWQE_DEBUGFS_RO(prev_dbg_uid0, genwqe_prev_dbg_uid0_show);
+
+static int genwqe_prev_dbg_uid1_show(struct seq_file *s, void *unused)
+{
+	return prev_dbg_uidn_show(s, unused, 1);
+}
+
+GENWQE_DEBUGFS_RO(prev_dbg_uid1, genwqe_prev_dbg_uid1_show);
+
+static int genwqe_prev_dbg_uid2_show(struct seq_file *s, void *unused)
+{
+	return prev_dbg_uidn_show(s, unused, 2);
+}
+
+GENWQE_DEBUGFS_RO(prev_dbg_uid2, genwqe_prev_dbg_uid2_show);
+
+static int genwqe_curr_regs_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int i;
+	struct genwqe_reg *regs;
+
+	regs = kcalloc(GENWQE_FFDC_REGS, sizeof(*regs), GFP_KERNEL);
+	if (regs == NULL)
+		return -ENOMEM;
+
+	genwqe_stop_traps(cd);
+	genwqe_read_ffdc_regs(cd, regs, GENWQE_FFDC_REGS, 1);
+	genwqe_start_traps(cd);
+
+	for (i = 0; i < GENWQE_FFDC_REGS; i++) {
+		if (regs[i].addr == 0xffffffff)
+			break;  /* invalid entries */
+
+		if (regs[i].val == 0x0ull)
+			continue;  /* do not print 0x0 FIRs */
+
+		seq_printf(s, "  0x%08x 0x%016llx\n",
+			   regs[i].addr, regs[i].val);
+	}
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(curr_regs, genwqe_curr_regs_show);
+
+static int genwqe_prev_regs_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int i;
+	struct genwqe_reg *regs = cd->ffdc[GENWQE_DBG_REGS].regs;
+
+	if (regs == NULL)
+		return -EINVAL;
+
+	for (i = 0; i < GENWQE_FFDC_REGS; i++) {
+		if (regs[i].addr == 0xffffffff)
+			break;  /* invalid entries */
+
+		if (regs[i].val == 0x0ull)
+			continue;  /* do not print 0x0 FIRs */
+
+		seq_printf(s, "  0x%08x 0x%016llx\n",
+			   regs[i].addr, regs[i].val);
+	}
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(prev_regs, genwqe_prev_regs_show);
+
+static int genwqe_jtimer_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int vf_num;
+	u64 jtimer;
+
+	jtimer = genwqe_read_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, 0);
+	seq_printf(s, "  PF   0x%016llx %d msec\n", jtimer,
+		   genwqe_pf_jobtimeout_msec);
+
+	for (vf_num = 0; vf_num < cd->num_vfs; vf_num++) {
+		jtimer = genwqe_read_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
+					  vf_num + 1);
+		seq_printf(s, "  VF%-2d 0x%016llx %d msec\n", vf_num, jtimer,
+			   cd->vf_jobtimeout_msec[vf_num]);
+	}
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(jtimer, genwqe_jtimer_show);
+
+static int genwqe_queue_working_time_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int vf_num;
+	u64 t;
+
+	t = genwqe_read_vreg(cd, IO_SLC_VF_QUEUE_WTIME, 0);
+	seq_printf(s, "  PF   0x%016llx\n", t);
+
+	for (vf_num = 0; vf_num < cd->num_vfs; vf_num++) {
+		t = genwqe_read_vreg(cd, IO_SLC_VF_QUEUE_WTIME, vf_num + 1);
+		seq_printf(s, "  VF%-2d 0x%016llx\n", vf_num, t);
+	}
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(queue_working_time, genwqe_queue_working_time_show);
+
+static int genwqe_ddcb_info_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int i;
+	struct ddcb_queue *queue;
+	struct ddcb *pddcb;
+
+	queue = &cd->queue;
+	seq_puts(s, "DDCB QUEUE:\n");
+	seq_printf(s, "  ddcb_max:            %d\n"
+		   "  ddcb_daddr:          %016llx - %016llx\n"
+		   "  ddcb_vaddr:          %016llx\n"
+		   "  ddcbs_in_flight:     %u\n"
+		   "  ddcbs_max_in_flight: %u\n"
+		   "  ddcbs_completed:     %u\n"
+		   "  busy:                %u\n"
+		   "  irqs_processed:      %u\n",
+		   queue->ddcb_max, (long long)queue->ddcb_daddr,
+		   (long long)queue->ddcb_daddr +
+		   (queue->ddcb_max * DDCB_LENGTH),
+		   (long long)queue->ddcb_vaddr, queue->ddcbs_in_flight,
+		   queue->ddcbs_max_in_flight, queue->ddcbs_completed,
+		   queue->busy, cd->irqs_processed);
+
+	/* Hardware State */
+	seq_printf(s, "  0x%08x 0x%016llx IO_QUEUE_CONFIG\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_STATUS\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_SEGMENT\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_INITSQN\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_WRAP\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_OFFSET\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_WTIME\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_ERRCNTS\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_LRW\n",
+		   queue->IO_QUEUE_CONFIG,
+		   __genwqe_readq(cd, queue->IO_QUEUE_CONFIG),
+		   queue->IO_QUEUE_STATUS,
+		   __genwqe_readq(cd, queue->IO_QUEUE_STATUS),
+		   queue->IO_QUEUE_SEGMENT,
+		   __genwqe_readq(cd, queue->IO_QUEUE_SEGMENT),
+		   queue->IO_QUEUE_INITSQN,
+		   __genwqe_readq(cd, queue->IO_QUEUE_INITSQN),
+		   queue->IO_QUEUE_WRAP,
+		   __genwqe_readq(cd, queue->IO_QUEUE_WRAP),
+		   queue->IO_QUEUE_OFFSET,
+		   __genwqe_readq(cd, queue->IO_QUEUE_OFFSET),
+		   queue->IO_QUEUE_WTIME,
+		   __genwqe_readq(cd, queue->IO_QUEUE_WTIME),
+		   queue->IO_QUEUE_ERRCNTS,
+		   __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS),
+		   queue->IO_QUEUE_LRW,
+		   __genwqe_readq(cd, queue->IO_QUEUE_LRW));
+
+	seq_printf(s, "DDCB list (ddcb_act=%d/ddcb_next=%d):\n",
+		   queue->ddcb_act, queue->ddcb_next);
+
+	pddcb = queue->ddcb_vaddr;
+	for (i = 0; i < queue->ddcb_max; i++) {
+		seq_printf(s, "  %-3d: RETC=%03x SEQ=%04x HSI/SHI=%02x/%02x ",
+			   i, be16_to_cpu(pddcb->retc_16),
+			   be16_to_cpu(pddcb->seqnum_16),
+			   pddcb->hsi, pddcb->shi);
+		seq_printf(s, "PRIV=%06llx CMD=%02x\n",
+			   be64_to_cpu(pddcb->priv_64), pddcb->cmd);
+		pddcb++;
+	}
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(ddcb_info, genwqe_ddcb_info_show);
+
+static int genwqe_info_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	u16 val16, type;
+	u64 app_id, slu_id, bitstream = -1;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	slu_id = __genwqe_readq(cd, IO_SLU_UNITCFG);
+	app_id = __genwqe_readq(cd, IO_APP_UNITCFG);
+
+	if (genwqe_is_privileged(cd))
+		bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM);
+
+	val16 = (u16)(slu_id & 0x0fLLU);
+	type  = (u16)((slu_id >> 20) & 0xffLLU);
+
+	seq_printf(s, "%s driver version: %s\n"
+		   "    Device Name/Type: %s %s CardIdx: %d\n"
+		   "    SLU/APP Config  : 0x%016llx/0x%016llx\n"
+		   "    Build Date      : %u/%x/%u\n"
+		   "    Base Clock      : %u MHz\n"
+		   "    Arch/SVN Release: %u/%llx\n"
+		   "    Bitstream       : %llx\n",
+		   GENWQE_DEVNAME, DRV_VERS_STRING, dev_name(&pci_dev->dev),
+		   genwqe_is_privileged(cd) ?
+		   "Physical" : "Virtual or no SR-IOV",
+		   cd->card_idx, slu_id, app_id,
+		   (u16)((slu_id >> 12) & 0x0fLLU),	   /* month */
+		   (u16)((slu_id >>  4) & 0xffLLU),	   /* day */
+		   (u16)((slu_id >> 16) & 0x0fLLU) + 2010, /* year */
+		   genwqe_base_clock_frequency(cd),
+		   (u16)((slu_id >> 32) & 0xffLLU), slu_id >> 40,
+		   bitstream);
+
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(info, genwqe_info_show);
+
+int genwqe_init_debugfs(struct genwqe_dev *cd)
+{
+	struct dentry *root;
+	struct dentry *file;
+	int ret;
+	char card_name[64];
+	char name[64];
+	unsigned int i;
+
+	sprintf(card_name, "%s%u_card", GENWQE_DEVNAME, cd->card_idx);
+
+	root = debugfs_create_dir(card_name, cd->debugfs_genwqe);
+	if (!root) {
+		ret = -ENOMEM;
+		goto err0;
+	}
+
+	/* non privileged interfaces are done here */
+	file = debugfs_create_file("ddcb_info", S_IRUGO, root, cd,
+				   &genwqe_ddcb_info_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("info", S_IRUGO, root, cd,
+				   &genwqe_info_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_x64("err_inject", 0666, root, &cd->err_inject);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_u32("ddcb_software_timeout", 0666, root,
+				  &cd->ddcb_software_timeout);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_u32("kill_timeout", 0666, root,
+				  &cd->kill_timeout);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	/* privileged interfaces follow here */
+	if (!genwqe_is_privileged(cd)) {
+		cd->debugfs_root = root;
+		return 0;
+	}
+
+	file = debugfs_create_file("curr_regs", S_IRUGO, root, cd,
+				   &genwqe_curr_regs_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("curr_dbg_uid0", S_IRUGO, root, cd,
+				   &genwqe_curr_dbg_uid0_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("curr_dbg_uid1", S_IRUGO, root, cd,
+				   &genwqe_curr_dbg_uid1_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("curr_dbg_uid2", S_IRUGO, root, cd,
+				   &genwqe_curr_dbg_uid2_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("prev_regs", S_IRUGO, root, cd,
+				   &genwqe_prev_regs_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("prev_dbg_uid0", S_IRUGO, root, cd,
+				   &genwqe_prev_dbg_uid0_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("prev_dbg_uid1", S_IRUGO, root, cd,
+				   &genwqe_prev_dbg_uid1_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("prev_dbg_uid2", S_IRUGO, root, cd,
+				   &genwqe_prev_dbg_uid2_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	for (i = 0; i <  GENWQE_MAX_VFS; i++) {
+		sprintf(name, "vf%d_jobtimeout_msec", i);
+
+		file = debugfs_create_u32(name, 0666, root,
+					  &cd->vf_jobtimeout_msec[i]);
+		if (!file) {
+			ret = -ENOMEM;
+			goto err1;
+		}
+	}
+
+	file = debugfs_create_file("jobtimer", S_IRUGO, root, cd,
+				   &genwqe_jtimer_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("queue_working_time", S_IRUGO, root, cd,
+				   &genwqe_queue_working_time_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_u32("skip_recovery", 0666, root,
+				  &cd->skip_recovery);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	cd->debugfs_root = root;
+	return 0;
+err1:
+	debugfs_remove_recursive(root);
+err0:
+	return ret;
+}
+
+void genqwe_exit_debugfs(struct genwqe_dev *cd)
+{
+	debugfs_remove_recursive(cd->debugfs_root);
+}
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
new file mode 100644
index 000000000000..8f8a6b327cdb
--- /dev/null
+++ b/drivers/misc/genwqe/card_dev.c
@@ -0,0 +1,1414 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Character device representation of the GenWQE device. This allows
+ * user-space applications to communicate with the card.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/atomic.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+static int genwqe_open_files(struct genwqe_dev *cd)
+{
+	int rc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cd->file_lock, flags);
+	rc = list_empty(&cd->file_list);
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+	return !rc;
+}
+
+static void genwqe_add_file(struct genwqe_dev *cd, struct genwqe_file *cfile)
+{
+	unsigned long flags;
+
+	cfile->owner = current;
+	spin_lock_irqsave(&cd->file_lock, flags);
+	list_add(&cfile->list, &cd->file_list);
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+}
+
+static int genwqe_del_file(struct genwqe_dev *cd, struct genwqe_file *cfile)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cd->file_lock, flags);
+	list_del(&cfile->list);
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+
+	return 0;
+}
+
+static void genwqe_add_pin(struct genwqe_file *cfile, struct dma_mapping *m)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cfile->pin_lock, flags);
+	list_add(&m->pin_list, &cfile->pin_list);
+	spin_unlock_irqrestore(&cfile->pin_lock, flags);
+}
+
+static int genwqe_del_pin(struct genwqe_file *cfile, struct dma_mapping *m)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cfile->pin_lock, flags);
+	list_del(&m->pin_list);
+	spin_unlock_irqrestore(&cfile->pin_lock, flags);
+
+	return 0;
+}
+
+/**
+ * genwqe_search_pin() - Search for the mapping for a userspace address
+ * @cfile:	Descriptor of opened file
+ * @u_addr:	User virtual address
+ * @size:	Size of buffer
+ * @dma_addr:	DMA address to be updated
+ *
+ * Return: Pointer to the corresponding mapping	NULL if not found
+ */
+static struct dma_mapping *genwqe_search_pin(struct genwqe_file *cfile,
+					    unsigned long u_addr,
+					    unsigned int size,
+					    void **virt_addr)
+{
+	unsigned long flags;
+	struct dma_mapping *m;
+
+	spin_lock_irqsave(&cfile->pin_lock, flags);
+
+	list_for_each_entry(m, &cfile->pin_list, pin_list) {
+		if ((((u64)m->u_vaddr) <= (u_addr)) &&
+		    (((u64)m->u_vaddr + m->size) >= (u_addr + size))) {
+
+			if (virt_addr)
+				*virt_addr = m->k_vaddr +
+					(u_addr - (u64)m->u_vaddr);
+
+			spin_unlock_irqrestore(&cfile->pin_lock, flags);
+			return m;
+		}
+	}
+	spin_unlock_irqrestore(&cfile->pin_lock, flags);
+	return NULL;
+}
+
+static void __genwqe_add_mapping(struct genwqe_file *cfile,
+			      struct dma_mapping *dma_map)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cfile->map_lock, flags);
+	list_add(&dma_map->card_list, &cfile->map_list);
+	spin_unlock_irqrestore(&cfile->map_lock, flags);
+}
+
+static void __genwqe_del_mapping(struct genwqe_file *cfile,
+			      struct dma_mapping *dma_map)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cfile->map_lock, flags);
+	list_del(&dma_map->card_list);
+	spin_unlock_irqrestore(&cfile->map_lock, flags);
+}
+
+
+/**
+ * __genwqe_search_mapping() - Search for the mapping for a userspace address
+ * @cfile:	descriptor of opened file
+ * @u_addr:	user virtual address
+ * @size:	size of buffer
+ * @dma_addr:	DMA address to be updated
+ * Return: Pointer to the corresponding mapping	NULL if not found
+ */
+static struct dma_mapping *__genwqe_search_mapping(struct genwqe_file *cfile,
+						   unsigned long u_addr,
+						   unsigned int size,
+						   dma_addr_t *dma_addr,
+						   void **virt_addr)
+{
+	unsigned long flags;
+	struct dma_mapping *m;
+	struct pci_dev *pci_dev = cfile->cd->pci_dev;
+
+	spin_lock_irqsave(&cfile->map_lock, flags);
+	list_for_each_entry(m, &cfile->map_list, card_list) {
+
+		if ((((u64)m->u_vaddr) <= (u_addr)) &&
+		    (((u64)m->u_vaddr + m->size) >= (u_addr + size))) {
+
+			/* match found: current is as expected and
+			   addr is in range */
+			if (dma_addr)
+				*dma_addr = m->dma_addr +
+					(u_addr - (u64)m->u_vaddr);
+
+			if (virt_addr)
+				*virt_addr = m->k_vaddr +
+					(u_addr - (u64)m->u_vaddr);
+
+			spin_unlock_irqrestore(&cfile->map_lock, flags);
+			return m;
+		}
+	}
+	spin_unlock_irqrestore(&cfile->map_lock, flags);
+
+	dev_err(&pci_dev->dev,
+		"[%s] Entry not found: u_addr=%lx, size=%x\n",
+		__func__, u_addr, size);
+
+	return NULL;
+}
+
+static void genwqe_remove_mappings(struct genwqe_file *cfile)
+{
+	int i = 0;
+	struct list_head *node, *next;
+	struct dma_mapping *dma_map;
+	struct genwqe_dev *cd = cfile->cd;
+	struct pci_dev *pci_dev = cfile->cd->pci_dev;
+
+	list_for_each_safe(node, next, &cfile->map_list) {
+		dma_map = list_entry(node, struct dma_mapping, card_list);
+
+		list_del_init(&dma_map->card_list);
+
+		/*
+		 * This is really a bug, because those things should
+		 * have been already tidied up.
+		 *
+		 * GENWQE_MAPPING_RAW should have been removed via mmunmap().
+		 * GENWQE_MAPPING_SGL_TEMP should be removed by tidy up code.
+		 */
+		dev_err(&pci_dev->dev,
+			"[%s] %d. cleanup mapping: u_vaddr=%p "
+			"u_kaddr=%016lx dma_addr=%lx\n", __func__, i++,
+			dma_map->u_vaddr, (unsigned long)dma_map->k_vaddr,
+			(unsigned long)dma_map->dma_addr);
+
+		if (dma_map->type == GENWQE_MAPPING_RAW) {
+			/* we allocated this dynamically */
+			__genwqe_free_consistent(cd, dma_map->size,
+						dma_map->k_vaddr,
+						dma_map->dma_addr);
+			kfree(dma_map);
+		} else if (dma_map->type == GENWQE_MAPPING_SGL_TEMP) {
+			/* we use dma_map statically from the request */
+			genwqe_user_vunmap(cd, dma_map, NULL);
+		}
+	}
+}
+
+static void genwqe_remove_pinnings(struct genwqe_file *cfile)
+{
+	struct list_head *node, *next;
+	struct dma_mapping *dma_map;
+	struct genwqe_dev *cd = cfile->cd;
+
+	list_for_each_safe(node, next, &cfile->pin_list) {
+		dma_map = list_entry(node, struct dma_mapping, pin_list);
+
+		/*
+		 * This is not a bug, because a killed processed might
+		 * not call the unpin ioctl, which is supposed to free
+		 * the resources.
+		 *
+		 * Pinnings are dymically allocated and need to be
+		 * deleted.
+		 */
+		list_del_init(&dma_map->pin_list);
+		genwqe_user_vunmap(cd, dma_map, NULL);
+		kfree(dma_map);
+	}
+}
+
+/**
+ * genwqe_kill_fasync() - Send signal to all processes with open GenWQE files
+ *
+ * E.g. genwqe_send_signal(cd, SIGIO);
+ */
+static int genwqe_kill_fasync(struct genwqe_dev *cd, int sig)
+{
+	unsigned int files = 0;
+	unsigned long flags;
+	struct genwqe_file *cfile;
+
+	spin_lock_irqsave(&cd->file_lock, flags);
+	list_for_each_entry(cfile, &cd->file_list, list) {
+		if (cfile->async_queue)
+			kill_fasync(&cfile->async_queue, sig, POLL_HUP);
+		files++;
+	}
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+	return files;
+}
+
+static int genwqe_force_sig(struct genwqe_dev *cd, int sig)
+{
+	unsigned int files = 0;
+	unsigned long flags;
+	struct genwqe_file *cfile;
+
+	spin_lock_irqsave(&cd->file_lock, flags);
+	list_for_each_entry(cfile, &cd->file_list, list) {
+		force_sig(sig, cfile->owner);
+		files++;
+	}
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+	return files;
+}
+
+/**
+ * genwqe_open() - file open
+ * @inode:      file system information
+ * @filp:	file handle
+ *
+ * This function is executed whenever an application calls
+ * open("/dev/genwqe",..).
+ *
+ * Return: 0 if successful or <0 if errors
+ */
+static int genwqe_open(struct inode *inode, struct file *filp)
+{
+	struct genwqe_dev *cd;
+	struct genwqe_file *cfile;
+	struct pci_dev *pci_dev;
+
+	cfile = kzalloc(sizeof(*cfile), GFP_KERNEL);
+	if (cfile == NULL)
+		return -ENOMEM;
+
+	cd = container_of(inode->i_cdev, struct genwqe_dev, cdev_genwqe);
+	pci_dev = cd->pci_dev;
+	cfile->cd = cd;
+	cfile->filp = filp;
+	cfile->client = NULL;
+
+	spin_lock_init(&cfile->map_lock);  /* list of raw memory allocations */
+	INIT_LIST_HEAD(&cfile->map_list);
+
+	spin_lock_init(&cfile->pin_lock);  /* list of user pinned memory */
+	INIT_LIST_HEAD(&cfile->pin_list);
+
+	filp->private_data = cfile;
+
+	genwqe_add_file(cd, cfile);
+	return 0;
+}
+
+/**
+ * genwqe_fasync() - Setup process to receive SIGIO.
+ * @fd:        file descriptor
+ * @filp:      file handle
+ * @mode:      file mode
+ *
+ * Sending a signal is working as following:
+ *
+ * if (cdev->async_queue)
+ *         kill_fasync(&cdev->async_queue, SIGIO, POLL_IN);
+ *
+ * Some devices also implement asynchronous notification to indicate
+ * when the device can be written; in this case, of course,
+ * kill_fasync must be called with a mode of POLL_OUT.
+ */
+static int genwqe_fasync(int fd, struct file *filp, int mode)
+{
+	struct genwqe_file *cdev = (struct genwqe_file *)filp->private_data;
+	return fasync_helper(fd, filp, mode, &cdev->async_queue);
+}
+
+
+/**
+ * genwqe_release() - file close
+ * @inode:      file system information
+ * @filp:       file handle
+ *
+ * This function is executed whenever an application calls 'close(fd_genwqe)'
+ *
+ * Return: always 0
+ */
+static int genwqe_release(struct inode *inode, struct file *filp)
+{
+	struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
+	struct genwqe_dev *cd = cfile->cd;
+
+	/* there must be no entries in these lists! */
+	genwqe_remove_mappings(cfile);
+	genwqe_remove_pinnings(cfile);
+
+	/* remove this filp from the asynchronously notified filp's */
+	genwqe_fasync(-1, filp, 0);
+
+	/*
+	 * For this to work we must not release cd when this cfile is
+	 * not yet released, otherwise the list entry is invalid,
+	 * because the list itself gets reinstantiated!
+	 */
+	genwqe_del_file(cd, cfile);
+	kfree(cfile);
+	return 0;
+}
+
+static void genwqe_vma_open(struct vm_area_struct *vma)
+{
+	/* nothing ... */
+}
+
+/**
+ * genwqe_vma_close() - Called each time when vma is unmapped
+ *
+ * Free memory which got allocated by GenWQE mmap().
+ */
+static void genwqe_vma_close(struct vm_area_struct *vma)
+{
+	unsigned long vsize = vma->vm_end - vma->vm_start;
+	struct inode *inode = vma->vm_file->f_dentry->d_inode;
+	struct dma_mapping *dma_map;
+	struct genwqe_dev *cd = container_of(inode->i_cdev, struct genwqe_dev,
+					    cdev_genwqe);
+	struct pci_dev *pci_dev = cd->pci_dev;
+	dma_addr_t d_addr = 0;
+	struct genwqe_file *cfile = vma->vm_private_data;
+
+	dma_map = __genwqe_search_mapping(cfile, vma->vm_start, vsize,
+					 &d_addr, NULL);
+	if (dma_map == NULL) {
+		dev_err(&pci_dev->dev,
+			"  [%s] err: mapping not found: v=%lx, p=%lx s=%lx\n",
+			__func__, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT,
+			vsize);
+		return;
+	}
+	__genwqe_del_mapping(cfile, dma_map);
+	__genwqe_free_consistent(cd, dma_map->size, dma_map->k_vaddr,
+				 dma_map->dma_addr);
+	kfree(dma_map);
+}
+
+static struct vm_operations_struct genwqe_vma_ops = {
+	.open   = genwqe_vma_open,
+	.close  = genwqe_vma_close,
+};
+
+/**
+ * genwqe_mmap() - Provide contignous buffers to userspace
+ *
+ * We use mmap() to allocate contignous buffers used for DMA
+ * transfers. After the buffer is allocated we remap it to user-space
+ * and remember a reference to our dma_mapping data structure, where
+ * we store the associated DMA address and allocated size.
+ *
+ * When we receive a DDCB execution request with the ATS bits set to
+ * plain buffer, we lookup our dma_mapping list to find the
+ * corresponding DMA address for the associated user-space address.
+ */
+static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	int rc;
+	unsigned long pfn, vsize = vma->vm_end - vma->vm_start;
+	struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
+	struct genwqe_dev *cd = cfile->cd;
+	struct dma_mapping *dma_map;
+
+	if (vsize == 0)
+		return -EINVAL;
+
+	if (get_order(vsize) > MAX_ORDER)
+		return -ENOMEM;
+
+	dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC);
+	if (dma_map == NULL)
+		return -ENOMEM;
+
+	genwqe_mapping_init(dma_map, GENWQE_MAPPING_RAW);
+	dma_map->u_vaddr = (void *)vma->vm_start;
+	dma_map->size = vsize;
+	dma_map->nr_pages = DIV_ROUND_UP(vsize, PAGE_SIZE);
+	dma_map->k_vaddr = __genwqe_alloc_consistent(cd, vsize,
+						     &dma_map->dma_addr);
+	if (dma_map->k_vaddr == NULL) {
+		rc = -ENOMEM;
+		goto free_dma_map;
+	}
+
+	if (capable(CAP_SYS_ADMIN) && (vsize > sizeof(dma_addr_t)))
+		*(dma_addr_t *)dma_map->k_vaddr = dma_map->dma_addr;
+
+	pfn = virt_to_phys(dma_map->k_vaddr) >> PAGE_SHIFT;
+	rc = remap_pfn_range(vma,
+			     vma->vm_start,
+			     pfn,
+			     vsize,
+			     vma->vm_page_prot);
+	if (rc != 0) {
+		rc = -EFAULT;
+		goto free_dma_mem;
+	}
+
+	vma->vm_private_data = cfile;
+	vma->vm_ops = &genwqe_vma_ops;
+	__genwqe_add_mapping(cfile, dma_map);
+
+	return 0;
+
+ free_dma_mem:
+	__genwqe_free_consistent(cd, dma_map->size,
+				dma_map->k_vaddr,
+				dma_map->dma_addr);
+ free_dma_map:
+	kfree(dma_map);
+	return rc;
+}
+
+/**
+ * do_flash_update() - Excute flash update (write image or CVPD)
+ * @cd:        genwqe device
+ * @load:      details about image load
+ *
+ * Return: 0 if successful
+ */
+
+#define	FLASH_BLOCK	0x40000	/* we use 256k blocks */
+
+static int do_flash_update(struct genwqe_file *cfile,
+			   struct genwqe_bitstream *load)
+{
+	int rc = 0;
+	int blocks_to_flash;
+	dma_addr_t dma_addr;
+	u64 flash = 0;
+	size_t tocopy = 0;
+	u8 __user *buf;
+	u8 *xbuf;
+	u32 crc;
+	u8 cmdopts;
+	struct genwqe_dev *cd = cfile->cd;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if ((load->size & 0x3) != 0)
+		return -EINVAL;
+
+	if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0)
+		return -EINVAL;
+
+	/* FIXME Bits have changed for new service layer! */
+	switch ((char)load->partition) {
+	case '0':
+		cmdopts = 0x14;
+		break;		/* download/erase_first/part_0 */
+	case '1':
+		cmdopts = 0x1C;
+		break;		/* download/erase_first/part_1 */
+	case 'v':		/* cmdopts = 0x0c (VPD) */
+	default:
+		return -EINVAL;
+	}
+
+	buf = (u8 __user *)load->data_addr;
+	xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr);
+	if (xbuf == NULL)
+		return -ENOMEM;
+
+	blocks_to_flash = load->size / FLASH_BLOCK;
+	while (load->size) {
+		struct genwqe_ddcb_cmd *req;
+
+		/*
+		 * We must be 4 byte aligned. Buffer must be 0 appened
+		 * to have defined values when calculating CRC.
+		 */
+		tocopy = min_t(size_t, load->size, FLASH_BLOCK);
+
+		rc = copy_from_user(xbuf, buf, tocopy);
+		if (rc) {
+			rc = -EFAULT;
+			goto free_buffer;
+		}
+		crc = genwqe_crc32(xbuf, tocopy, 0xffffffff);
+
+		dev_dbg(&pci_dev->dev,
+			"[%s] DMA: %lx CRC: %08x SZ: %ld %d\n",
+			__func__, (unsigned long)dma_addr, crc, tocopy,
+			blocks_to_flash);
+
+		/* prepare DDCB for SLU process */
+		req = ddcb_requ_alloc();
+		if (req == NULL) {
+			rc = -ENOMEM;
+			goto free_buffer;
+		}
+
+		req->cmd = SLCMD_MOVE_FLASH;
+		req->cmdopts = cmdopts;
+
+		/* prepare invariant values */
+		if (genwqe_get_slu_id(cd) <= 0x2) {
+			*(__be64 *)&req->__asiv[0]  = cpu_to_be64(dma_addr);
+			*(__be64 *)&req->__asiv[8]  = cpu_to_be64(tocopy);
+			*(__be64 *)&req->__asiv[16] = cpu_to_be64(flash);
+			*(__be32 *)&req->__asiv[24] = cpu_to_be32(0);
+			req->__asiv[24]	       = load->uid;
+			*(__be32 *)&req->__asiv[28] = cpu_to_be32(crc);
+
+			/* for simulation only */
+			*(__be64 *)&req->__asiv[88] = cpu_to_be64(load->slu_id);
+			*(__be64 *)&req->__asiv[96] = cpu_to_be64(load->app_id);
+			req->asiv_length = 32; /* bytes included in crc calc */
+		} else {	/* setup DDCB for ATS architecture */
+			*(__be64 *)&req->asiv[0]  = cpu_to_be64(dma_addr);
+			*(__be32 *)&req->asiv[8]  = cpu_to_be32(tocopy);
+			*(__be32 *)&req->asiv[12] = cpu_to_be32(0); /* resvd */
+			*(__be64 *)&req->asiv[16] = cpu_to_be64(flash);
+			*(__be32 *)&req->asiv[24] = cpu_to_be32(load->uid<<24);
+			*(__be32 *)&req->asiv[28] = cpu_to_be32(crc);
+
+			/* for simulation only */
+			*(__be64 *)&req->asiv[80] = cpu_to_be64(load->slu_id);
+			*(__be64 *)&req->asiv[88] = cpu_to_be64(load->app_id);
+
+			/* Rd only */
+			req->ats = 0x4ULL << 44;
+			req->asiv_length = 40; /* bytes included in crc calc */
+		}
+		req->asv_length  = 8;
+
+		/* For Genwqe5 we get back the calculated CRC */
+		*(u64 *)&req->asv[0] = 0ULL;			/* 0x80 */
+
+		rc = __genwqe_execute_raw_ddcb(cd, req);
+
+		load->retc = req->retc;
+		load->attn = req->attn;
+		load->progress = req->progress;
+
+		if (rc < 0) {
+			ddcb_requ_free(req);
+			goto free_buffer;
+		}
+
+		if (req->retc != DDCB_RETC_COMPLETE) {
+			rc = -EIO;
+			ddcb_requ_free(req);
+			goto free_buffer;
+		}
+
+		load->size  -= tocopy;
+		flash += tocopy;
+		buf += tocopy;
+		blocks_to_flash--;
+		ddcb_requ_free(req);
+	}
+
+ free_buffer:
+	__genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr);
+	return rc;
+}
+
+static int do_flash_read(struct genwqe_file *cfile,
+			 struct genwqe_bitstream *load)
+{
+	int rc, blocks_to_flash;
+	dma_addr_t dma_addr;
+	u64 flash = 0;
+	size_t tocopy = 0;
+	u8 __user *buf;
+	u8 *xbuf;
+	u8 cmdopts;
+	struct genwqe_dev *cd = cfile->cd;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	struct genwqe_ddcb_cmd *cmd;
+
+	if ((load->size & 0x3) != 0)
+		return -EINVAL;
+
+	if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0)
+		return -EINVAL;
+
+	/* FIXME Bits have changed for new service layer! */
+	switch ((char)load->partition) {
+	case '0':
+		cmdopts = 0x12;
+		break;		/* upload/part_0 */
+	case '1':
+		cmdopts = 0x1A;
+		break;		/* upload/part_1 */
+	case 'v':
+	default:
+		return -EINVAL;
+	}
+
+	buf = (u8 __user *)load->data_addr;
+	xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr);
+	if (xbuf == NULL)
+		return -ENOMEM;
+
+	blocks_to_flash = load->size / FLASH_BLOCK;
+	while (load->size) {
+		/*
+		 * We must be 4 byte aligned. Buffer must be 0 appened
+		 * to have defined values when calculating CRC.
+		 */
+		tocopy = min_t(size_t, load->size, FLASH_BLOCK);
+
+		dev_dbg(&pci_dev->dev,
+			"[%s] DMA: %lx SZ: %ld %d\n",
+			__func__, (unsigned long)dma_addr, tocopy,
+			blocks_to_flash);
+
+		/* prepare DDCB for SLU process */
+		cmd = ddcb_requ_alloc();
+		if (cmd == NULL) {
+			rc = -ENOMEM;
+			goto free_buffer;
+		}
+		cmd->cmd = SLCMD_MOVE_FLASH;
+		cmd->cmdopts = cmdopts;
+
+		/* prepare invariant values */
+		if (genwqe_get_slu_id(cd) <= 0x2) {
+			*(__be64 *)&cmd->__asiv[0]  = cpu_to_be64(dma_addr);
+			*(__be64 *)&cmd->__asiv[8]  = cpu_to_be64(tocopy);
+			*(__be64 *)&cmd->__asiv[16] = cpu_to_be64(flash);
+			*(__be32 *)&cmd->__asiv[24] = cpu_to_be32(0);
+			cmd->__asiv[24] = load->uid;
+			*(__be32 *)&cmd->__asiv[28] = cpu_to_be32(0) /* CRC */;
+			cmd->asiv_length = 32; /* bytes included in crc calc */
+		} else {	/* setup DDCB for ATS architecture */
+			*(__be64 *)&cmd->asiv[0]  = cpu_to_be64(dma_addr);
+			*(__be32 *)&cmd->asiv[8]  = cpu_to_be32(tocopy);
+			*(__be32 *)&cmd->asiv[12] = cpu_to_be32(0); /* resvd */
+			*(__be64 *)&cmd->asiv[16] = cpu_to_be64(flash);
+			*(__be32 *)&cmd->asiv[24] = cpu_to_be32(load->uid<<24);
+			*(__be32 *)&cmd->asiv[28] = cpu_to_be32(0); /* CRC */
+
+			/* rd/wr */
+			cmd->ats = 0x5ULL << 44;
+			cmd->asiv_length = 40; /* bytes included in crc calc */
+		}
+		cmd->asv_length  = 8;
+
+		/* we only get back the calculated CRC */
+		*(u64 *)&cmd->asv[0] = 0ULL;	/* 0x80 */
+
+		rc = __genwqe_execute_raw_ddcb(cd, cmd);
+
+		load->retc = cmd->retc;
+		load->attn = cmd->attn;
+		load->progress = cmd->progress;
+
+		if ((rc < 0) && (rc != -EBADMSG)) {
+			ddcb_requ_free(cmd);
+			goto free_buffer;
+		}
+
+		rc = copy_to_user(buf, xbuf, tocopy);
+		if (rc) {
+			rc = -EFAULT;
+			ddcb_requ_free(cmd);
+			goto free_buffer;
+		}
+
+		/* We know that we can get retc 0x104 with CRC err */
+		if (((cmd->retc == DDCB_RETC_FAULT) &&
+		     (cmd->attn != 0x02)) ||  /* Normally ignore CRC error */
+		    ((cmd->retc == DDCB_RETC_COMPLETE) &&
+		     (cmd->attn != 0x00))) {  /* Everything was fine */
+			rc = -EIO;
+			ddcb_requ_free(cmd);
+			goto free_buffer;
+		}
+
+		load->size  -= tocopy;
+		flash += tocopy;
+		buf += tocopy;
+		blocks_to_flash--;
+		ddcb_requ_free(cmd);
+	}
+	rc = 0;
+
+ free_buffer:
+	__genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr);
+	return rc;
+}
+
+static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
+{
+	int rc;
+	struct genwqe_dev *cd = cfile->cd;
+	struct pci_dev *pci_dev = cfile->cd->pci_dev;
+	struct dma_mapping *dma_map;
+	unsigned long map_addr;
+	unsigned long map_size;
+
+	if ((m->addr == 0x0) || (m->size == 0))
+		return -EINVAL;
+
+	map_addr = (m->addr & PAGE_MASK);
+	map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
+
+	dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC);
+	if (dma_map == NULL)
+		return -ENOMEM;
+
+	genwqe_mapping_init(dma_map, GENWQE_MAPPING_SGL_PINNED);
+	rc = genwqe_user_vmap(cd, dma_map, (void *)map_addr, map_size, NULL);
+	if (rc != 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] genwqe_user_vmap rc=%d\n", __func__, rc);
+		return rc;
+	}
+
+	genwqe_add_pin(cfile, dma_map);
+	return 0;
+}
+
+static int genwqe_unpin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
+{
+	struct genwqe_dev *cd = cfile->cd;
+	struct dma_mapping *dma_map;
+	unsigned long map_addr;
+	unsigned long map_size;
+
+	if (m->addr == 0x0)
+		return -EINVAL;
+
+	map_addr = (m->addr & PAGE_MASK);
+	map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
+
+	dma_map = genwqe_search_pin(cfile, map_addr, map_size, NULL);
+	if (dma_map == NULL)
+		return -ENOENT;
+
+	genwqe_del_pin(cfile, dma_map);
+	genwqe_user_vunmap(cd, dma_map, NULL);
+	kfree(dma_map);
+	return 0;
+}
+
+/**
+ * ddcb_cmd_cleanup() - Remove dynamically created fixup entries
+ *
+ * Only if there are any. Pinnings are not removed.
+ */
+static int ddcb_cmd_cleanup(struct genwqe_file *cfile, struct ddcb_requ *req)
+{
+	unsigned int i;
+	struct dma_mapping *dma_map;
+	struct genwqe_dev *cd = cfile->cd;
+
+	for (i = 0; i < DDCB_FIXUPS; i++) {
+		dma_map = &req->dma_mappings[i];
+
+		if (dma_mapping_used(dma_map)) {
+			__genwqe_del_mapping(cfile, dma_map);
+			genwqe_user_vunmap(cd, dma_map, req);
+		}
+		if (req->sgl[i] != NULL) {
+			genwqe_free_sgl(cd, req->sgl[i],
+				       req->sgl_dma_addr[i],
+				       req->sgl_size[i]);
+			req->sgl[i] = NULL;
+			req->sgl_dma_addr[i] = 0x0;
+			req->sgl_size[i] = 0;
+		}
+
+	}
+	return 0;
+}
+
+/**
+ * ddcb_cmd_fixups() - Establish DMA fixups/sglists for user memory references
+ *
+ * Before the DDCB gets executed we need to handle the fixups. We
+ * replace the user-space addresses with DMA addresses or do
+ * additional setup work e.g. generating a scatter-gather list which
+ * is used to describe the memory referred to in the fixup.
+ */
+static int ddcb_cmd_fixups(struct genwqe_file *cfile, struct ddcb_requ *req)
+{
+	int rc;
+	unsigned int asiv_offs, i;
+	struct genwqe_dev *cd = cfile->cd;
+	struct genwqe_ddcb_cmd *cmd = &req->cmd;
+	struct dma_mapping *m;
+	const char *type = "UNKNOWN";
+
+	for (i = 0, asiv_offs = 0x00; asiv_offs <= 0x58;
+	     i++, asiv_offs += 0x08) {
+
+		u64 u_addr;
+		dma_addr_t d_addr;
+		u32 u_size = 0;
+		u64 ats_flags;
+
+		ats_flags = ATS_GET_FLAGS(cmd->ats, asiv_offs);
+
+		switch (ats_flags) {
+
+		case ATS_TYPE_DATA:
+			break;	/* nothing to do here */
+
+		case ATS_TYPE_FLAT_RDWR:
+		case ATS_TYPE_FLAT_RD: {
+			u_addr = be64_to_cpu(*((__be64 *)&cmd->
+					       asiv[asiv_offs]));
+			u_size = be32_to_cpu(*((__be32 *)&cmd->
+					       asiv[asiv_offs + 0x08]));
+
+			/*
+			 * No data available. Ignore u_addr in this
+			 * case and set addr to 0. Hardware must not
+			 * fetch the buffer.
+			 */
+			if (u_size == 0x0) {
+				*((__be64 *)&cmd->asiv[asiv_offs]) =
+					cpu_to_be64(0x0);
+				break;
+			}
+
+			m = __genwqe_search_mapping(cfile, u_addr, u_size,
+						   &d_addr, NULL);
+			if (m == NULL) {
+				rc = -EFAULT;
+				goto err_out;
+			}
+
+			*((__be64 *)&cmd->asiv[asiv_offs]) =
+				cpu_to_be64(d_addr);
+			break;
+		}
+
+		case ATS_TYPE_SGL_RDWR:
+		case ATS_TYPE_SGL_RD: {
+			int page_offs, nr_pages, offs;
+
+			u_addr = be64_to_cpu(*((__be64 *)
+					       &cmd->asiv[asiv_offs]));
+			u_size = be32_to_cpu(*((__be32 *)
+					       &cmd->asiv[asiv_offs + 0x08]));
+
+			/*
+			 * No data available. Ignore u_addr in this
+			 * case and set addr to 0. Hardware must not
+			 * fetch the empty sgl.
+			 */
+			if (u_size == 0x0) {
+				*((__be64 *)&cmd->asiv[asiv_offs]) =
+					cpu_to_be64(0x0);
+				break;
+			}
+
+			m = genwqe_search_pin(cfile, u_addr, u_size, NULL);
+			if (m != NULL) {
+				type = "PINNING";
+				page_offs = (u_addr -
+					     (u64)m->u_vaddr)/PAGE_SIZE;
+			} else {
+				type = "MAPPING";
+				m = &req->dma_mappings[i];
+
+				genwqe_mapping_init(m,
+						    GENWQE_MAPPING_SGL_TEMP);
+				rc = genwqe_user_vmap(cd, m, (void *)u_addr,
+						      u_size, req);
+				if (rc != 0)
+					goto err_out;
+
+				__genwqe_add_mapping(cfile, m);
+				page_offs = 0;
+			}
+
+			offs = offset_in_page(u_addr);
+			nr_pages = DIV_ROUND_UP(offs + u_size, PAGE_SIZE);
+
+			/* create genwqe style scatter gather list */
+			req->sgl[i] = genwqe_alloc_sgl(cd, m->nr_pages,
+						      &req->sgl_dma_addr[i],
+						      &req->sgl_size[i]);
+			if (req->sgl[i] == NULL) {
+				rc = -ENOMEM;
+				goto err_out;
+			}
+			genwqe_setup_sgl(cd, offs, u_size,
+					req->sgl[i],
+					req->sgl_dma_addr[i],
+					req->sgl_size[i],
+					m->dma_list,
+					page_offs,
+					nr_pages);
+
+			*((__be64 *)&cmd->asiv[asiv_offs]) =
+				cpu_to_be64(req->sgl_dma_addr[i]);
+
+			break;
+		}
+		default:
+			rc = -EINVAL;
+			goto err_out;
+		}
+	}
+	return 0;
+
+ err_out:
+	ddcb_cmd_cleanup(cfile, req);
+	return rc;
+}
+
+/**
+ * genwqe_execute_ddcb() - Execute DDCB using userspace address fixups
+ *
+ * The code will build up the translation tables or lookup the
+ * contignous memory allocation table to find the right translations
+ * and DMA addresses.
+ */
+static int genwqe_execute_ddcb(struct genwqe_file *cfile,
+			       struct genwqe_ddcb_cmd *cmd)
+{
+	int rc;
+	struct genwqe_dev *cd = cfile->cd;
+	struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
+
+	rc = ddcb_cmd_fixups(cfile, req);
+	if (rc != 0)
+		return rc;
+
+	rc = __genwqe_execute_raw_ddcb(cd, cmd);
+	ddcb_cmd_cleanup(cfile, req);
+	return rc;
+}
+
+static int do_execute_ddcb(struct genwqe_file *cfile,
+			   unsigned long arg, int raw)
+{
+	int rc;
+	struct genwqe_ddcb_cmd *cmd;
+	struct ddcb_requ *req;
+	struct genwqe_dev *cd = cfile->cd;
+
+	cmd = ddcb_requ_alloc();
+	if (cmd == NULL)
+		return -ENOMEM;
+
+	req = container_of(cmd, struct ddcb_requ, cmd);
+
+	if (copy_from_user(cmd, (void __user *)arg, sizeof(*cmd))) {
+		ddcb_requ_free(cmd);
+		return -EFAULT;
+	}
+
+	if (!raw)
+		rc = genwqe_execute_ddcb(cfile, cmd);
+	else
+		rc = __genwqe_execute_raw_ddcb(cd, cmd);
+
+	/* Copy back only the modifed fields. Do not copy ASIV
+	   back since the copy got modified by the driver. */
+	if (copy_to_user((void __user *)arg, cmd,
+			 sizeof(*cmd) - DDCB_ASIV_LENGTH)) {
+		ddcb_requ_free(cmd);
+		return -EFAULT;
+	}
+
+	ddcb_requ_free(cmd);
+	return rc;
+}
+
+/**
+ * genwqe_ioctl() - IO control
+ * @filp:       file handle
+ * @cmd:        command identifier (passed from user)
+ * @arg:        argument (passed from user)
+ *
+ * Return: 0 success
+ */
+static long genwqe_ioctl(struct file *filp, unsigned int cmd,
+			 unsigned long arg)
+{
+	int rc = 0;
+	struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
+	struct genwqe_dev *cd = cfile->cd;
+	struct genwqe_reg_io __user *io;
+	u64 val;
+	u32 reg_offs;
+
+	if (_IOC_TYPE(cmd) != GENWQE_IOC_CODE)
+		return -EINVAL;
+
+	switch (cmd) {
+
+	case GENWQE_GET_CARD_STATE:
+		put_user(cd->card_state, (enum genwqe_card_state __user *)arg);
+		return 0;
+
+		/* Register access */
+	case GENWQE_READ_REG64: {
+		io = (struct genwqe_reg_io __user *)arg;
+
+		if (get_user(reg_offs, &io->num))
+			return -EFAULT;
+
+		if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7))
+			return -EINVAL;
+
+		val = __genwqe_readq(cd, reg_offs);
+		put_user(val, &io->val64);
+		return 0;
+	}
+
+	case GENWQE_WRITE_REG64: {
+		io = (struct genwqe_reg_io __user *)arg;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
+			return -EPERM;
+
+		if (get_user(reg_offs, &io->num))
+			return -EFAULT;
+
+		if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7))
+			return -EINVAL;
+
+		if (get_user(val, &io->val64))
+			return -EFAULT;
+
+		__genwqe_writeq(cd, reg_offs, val);
+		return 0;
+	}
+
+	case GENWQE_READ_REG32: {
+		io = (struct genwqe_reg_io __user *)arg;
+
+		if (get_user(reg_offs, &io->num))
+			return -EFAULT;
+
+		if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3))
+			return -EINVAL;
+
+		val = __genwqe_readl(cd, reg_offs);
+		put_user(val, &io->val64);
+		return 0;
+	}
+
+	case GENWQE_WRITE_REG32: {
+		io = (struct genwqe_reg_io __user *)arg;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
+			return -EPERM;
+
+		if (get_user(reg_offs, &io->num))
+			return -EFAULT;
+
+		if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3))
+			return -EINVAL;
+
+		if (get_user(val, &io->val64))
+			return -EFAULT;
+
+		__genwqe_writel(cd, reg_offs, val);
+		return 0;
+	}
+
+		/* Flash update/reading */
+	case GENWQE_SLU_UPDATE: {
+		struct genwqe_bitstream load;
+
+		if (!genwqe_is_privileged(cd))
+			return -EPERM;
+
+		if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
+			return -EPERM;
+
+		if (copy_from_user(&load, (void __user *)arg,
+				   sizeof(load)))
+			return -EFAULT;
+
+		rc = do_flash_update(cfile, &load);
+
+		if (copy_to_user((void __user *)arg, &load, sizeof(load)))
+			return -EFAULT;
+
+		return rc;
+	}
+
+	case GENWQE_SLU_READ: {
+		struct genwqe_bitstream load;
+
+		if (!genwqe_is_privileged(cd))
+			return -EPERM;
+
+		if (genwqe_flash_readback_fails(cd))
+			return -ENOSPC;	 /* known to fail for old versions */
+
+		if (copy_from_user(&load, (void __user *)arg, sizeof(load)))
+			return -EFAULT;
+
+		rc = do_flash_read(cfile, &load);
+
+		if (copy_to_user((void __user *)arg, &load, sizeof(load)))
+			return -EFAULT;
+
+		return rc;
+	}
+
+		/* memory pinning and unpinning */
+	case GENWQE_PIN_MEM: {
+		struct genwqe_mem m;
+
+		if (copy_from_user(&m, (void __user *)arg, sizeof(m)))
+			return -EFAULT;
+
+		return genwqe_pin_mem(cfile, &m);
+	}
+
+	case GENWQE_UNPIN_MEM: {
+		struct genwqe_mem m;
+
+		if (copy_from_user(&m, (void __user *)arg, sizeof(m)))
+			return -EFAULT;
+
+		return genwqe_unpin_mem(cfile, &m);
+	}
+
+		/* launch an DDCB and wait for completion */
+	case GENWQE_EXECUTE_DDCB:
+		return do_execute_ddcb(cfile, arg, 0);
+
+	case GENWQE_EXECUTE_RAW_DDCB: {
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		return do_execute_ddcb(cfile, arg, 1);
+	}
+
+	default:
+		return -EINVAL;
+	}
+
+	return rc;
+}
+
+#if defined(CONFIG_COMPAT)
+/**
+ * genwqe_compat_ioctl() - Compatibility ioctl
+ *
+ * Called whenever a 32-bit process running under a 64-bit kernel
+ * performs an ioctl on /dev/genwqe<n>_card.
+ *
+ * @filp:        file pointer.
+ * @cmd:         command.
+ * @arg:         user argument.
+ * Return:       zero on success or negative number on failure.
+ */
+static long genwqe_compat_ioctl(struct file *filp, unsigned int cmd,
+				unsigned long arg)
+{
+	return genwqe_ioctl(filp, cmd, arg);
+}
+#endif /* defined(CONFIG_COMPAT) */
+
+static const struct file_operations genwqe_fops = {
+	.owner		= THIS_MODULE,
+	.open		= genwqe_open,
+	.fasync		= genwqe_fasync,
+	.mmap		= genwqe_mmap,
+	.unlocked_ioctl	= genwqe_ioctl,
+#if defined(CONFIG_COMPAT)
+	.compat_ioctl   = genwqe_compat_ioctl,
+#endif
+	.release	= genwqe_release,
+};
+
+static int genwqe_device_initialized(struct genwqe_dev *cd)
+{
+	return cd->dev != NULL;
+}
+
+/**
+ * genwqe_device_create() - Create and configure genwqe char device
+ * @cd:      genwqe device descriptor
+ *
+ * This function must be called before we create any more genwqe
+ * character devices, because it is allocating the major and minor
+ * number which are supposed to be used by the client drivers.
+ */
+int genwqe_device_create(struct genwqe_dev *cd)
+{
+	int rc;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	/*
+	 * Here starts the individual setup per client. It must
+	 * initialize its own cdev data structure with its own fops.
+	 * The appropriate devnum needs to be created. The ranges must
+	 * not overlap.
+	 */
+	rc = alloc_chrdev_region(&cd->devnum_genwqe, 0,
+				 GENWQE_MAX_MINOR, GENWQE_DEVNAME);
+	if (rc < 0) {
+		dev_err(&pci_dev->dev, "err: alloc_chrdev_region failed\n");
+		goto err_dev;
+	}
+
+	cdev_init(&cd->cdev_genwqe, &genwqe_fops);
+	cd->cdev_genwqe.owner = THIS_MODULE;
+
+	rc = cdev_add(&cd->cdev_genwqe, cd->devnum_genwqe, 1);
+	if (rc < 0) {
+		dev_err(&pci_dev->dev, "err: cdev_add failed\n");
+		goto err_add;
+	}
+
+	/*
+	 * Finally the device in /dev/... must be created. The rule is
+	 * to use card%d_clientname for each created device.
+	 */
+	cd->dev = device_create_with_groups(cd->class_genwqe,
+					    &cd->pci_dev->dev,
+					    cd->devnum_genwqe, cd,
+					    genwqe_attribute_groups,
+					    GENWQE_DEVNAME "%u_card",
+					    cd->card_idx);
+	if (IS_ERR(cd->dev)) {
+		rc = PTR_ERR(cd->dev);
+		goto err_cdev;
+	}
+
+	rc = genwqe_init_debugfs(cd);
+	if (rc != 0)
+		goto err_debugfs;
+
+	return 0;
+
+ err_debugfs:
+	device_destroy(cd->class_genwqe, cd->devnum_genwqe);
+ err_cdev:
+	cdev_del(&cd->cdev_genwqe);
+ err_add:
+	unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR);
+ err_dev:
+	cd->dev = NULL;
+	return rc;
+}
+
+static int genwqe_inform_and_stop_processes(struct genwqe_dev *cd)
+{
+	int rc;
+	unsigned int i;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!genwqe_open_files(cd))
+		return 0;
+
+	dev_warn(&pci_dev->dev, "[%s] send SIGIO and wait ...\n", __func__);
+
+	rc = genwqe_kill_fasync(cd, SIGIO);
+	if (rc > 0) {
+		/* give kill_timeout seconds to close file descriptors ... */
+		for (i = 0; (i < genwqe_kill_timeout) &&
+			     genwqe_open_files(cd); i++) {
+			dev_info(&pci_dev->dev, "  %d sec ...", i);
+
+			cond_resched();
+			msleep(1000);
+		}
+
+		/* if no open files we can safely continue, else ... */
+		if (!genwqe_open_files(cd))
+			return 0;
+
+		dev_warn(&pci_dev->dev,
+			 "[%s] send SIGKILL and wait ...\n", __func__);
+
+		rc = genwqe_force_sig(cd, SIGKILL); /* force terminate */
+		if (rc) {
+			/* Give kill_timout more seconds to end processes */
+			for (i = 0; (i < genwqe_kill_timeout) &&
+				     genwqe_open_files(cd); i++) {
+				dev_warn(&pci_dev->dev, "  %d sec ...", i);
+
+				cond_resched();
+				msleep(1000);
+			}
+		}
+	}
+	return 0;
+}
+
+/**
+ * genwqe_device_remove() - Remove genwqe's char device
+ *
+ * This function must be called after the client devices are removed
+ * because it will free the major/minor number range for the genwqe
+ * drivers.
+ *
+ * This function must be robust enough to be called twice.
+ */
+int genwqe_device_remove(struct genwqe_dev *cd)
+{
+	int rc;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!genwqe_device_initialized(cd))
+		return 1;
+
+	genwqe_inform_and_stop_processes(cd);
+
+	/*
+	 * We currently do wait until all filedescriptors are
+	 * closed. This leads to a problem when we abort the
+	 * application which will decrease this reference from
+	 * 1/unused to 0/illegal and not from 2/used 1/empty.
+	 */
+	rc = atomic_read(&cd->cdev_genwqe.kobj.kref.refcount);
+	if (rc != 1) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: cdev_genwqe...refcount=%d\n", __func__, rc);
+		panic("Fatal err: cannot free resources with pending references!");
+	}
+
+	genqwe_exit_debugfs(cd);
+	device_destroy(cd->class_genwqe, cd->devnum_genwqe);
+	cdev_del(&cd->cdev_genwqe);
+	unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR);
+	cd->dev = NULL;
+
+	return 0;
+}
diff --git a/drivers/misc/genwqe/card_sysfs.c b/drivers/misc/genwqe/card_sysfs.c
new file mode 100644
index 000000000000..a72a99266c3c
--- /dev/null
+++ b/drivers/misc/genwqe/card_sysfs.c
@@ -0,0 +1,288 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Sysfs interfaces for the GenWQE card. There are attributes to query
+ * the version of the bitstream as well as some for the driver. For
+ * debugging, please also see the debugfs interfaces of this driver.
+ */
+
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/device.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+static const char * const genwqe_types[] = {
+	[GENWQE_TYPE_ALTERA_230] = "GenWQE4-230",
+	[GENWQE_TYPE_ALTERA_530] = "GenWQE4-530",
+	[GENWQE_TYPE_ALTERA_A4]  = "GenWQE5-A4",
+	[GENWQE_TYPE_ALTERA_A7]  = "GenWQE5-A7",
+};
+
+static ssize_t status_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+	const char *cs[GENWQE_CARD_STATE_MAX] = { "unused", "used", "error" };
+
+	return sprintf(buf, "%s\n", cs[cd->card_state]);
+}
+static DEVICE_ATTR_RO(status);
+
+static ssize_t appid_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	char app_name[5];
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	genwqe_read_app_id(cd, app_name, sizeof(app_name));
+	return sprintf(buf, "%s\n", app_name);
+}
+static DEVICE_ATTR_RO(appid);
+
+static ssize_t version_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	u64 slu_id, app_id;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	slu_id = __genwqe_readq(cd, IO_SLU_UNITCFG);
+	app_id = __genwqe_readq(cd, IO_APP_UNITCFG);
+
+	return sprintf(buf, "%016llx.%016llx\n", slu_id, app_id);
+}
+static DEVICE_ATTR_RO(version);
+
+static ssize_t type_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	u8 card_type;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	card_type = genwqe_card_type(cd);
+	return sprintf(buf, "%s\n", (card_type >= ARRAY_SIZE(genwqe_types)) ?
+		       "invalid" : genwqe_types[card_type]);
+}
+static DEVICE_ATTR_RO(type);
+
+static ssize_t driver_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	return sprintf(buf, "%s\n", DRV_VERS_STRING);
+}
+static DEVICE_ATTR_RO(driver);
+
+static ssize_t tempsens_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	u64 tempsens;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	tempsens = __genwqe_readq(cd, IO_SLU_TEMPERATURE_SENSOR);
+	return sprintf(buf, "%016llx\n", tempsens);
+}
+static DEVICE_ATTR_RO(tempsens);
+
+static ssize_t freerunning_timer_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	u64 t;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	t = __genwqe_readq(cd, IO_SLC_FREE_RUNNING_TIMER);
+	return sprintf(buf, "%016llx\n", t);
+}
+static DEVICE_ATTR_RO(freerunning_timer);
+
+static ssize_t queue_working_time_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	u64 t;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	t = __genwqe_readq(cd, IO_SLC_QUEUE_WTIME);
+	return sprintf(buf, "%016llx\n", t);
+}
+static DEVICE_ATTR_RO(queue_working_time);
+
+static ssize_t base_clock_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	u64 base_clock;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	base_clock = genwqe_base_clock_frequency(cd);
+	return sprintf(buf, "%lld\n", base_clock);
+}
+static DEVICE_ATTR_RO(base_clock);
+
+/**
+ * curr_bitstream_show() - Show the current bitstream id
+ *
+ * There is a bug in some old versions of the CPLD which selects the
+ * bitstream, which causes the IO_SLU_BITSTREAM register to report
+ * unreliable data in very rare cases. This makes this sysfs
+ * unreliable up to the point were a new CPLD version is being used.
+ *
+ * Unfortunately there is no automatic way yet to query the CPLD
+ * version, such that you need to manually ensure via programming
+ * tools that you have a recent version of the CPLD software.
+ *
+ * The proposed circumvention is to use a special recovery bitstream
+ * on the backup partition (0) to identify problems while loading the
+ * image.
+ */
+static ssize_t curr_bitstream_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int curr_bitstream;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	curr_bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1;
+	return sprintf(buf, "%d\n", curr_bitstream);
+}
+static DEVICE_ATTR_RO(curr_bitstream);
+
+/**
+ * next_bitstream_show() - Show the next activated bitstream
+ *
+ * IO_SLC_CFGREG_SOFTRESET: This register can only be accessed by the PF.
+ */
+static ssize_t next_bitstream_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int next_bitstream;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	switch ((cd->softreset & 0xc) >> 2) {
+	case 0x2:
+		next_bitstream =  0;
+		break;
+	case 0x3:
+		next_bitstream =  1;
+		break;
+	default:
+		next_bitstream = -1;
+		break;		/* error */
+	}
+	return sprintf(buf, "%d\n", next_bitstream);
+}
+
+static ssize_t next_bitstream_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	int partition;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	if (kstrtoint(buf, 0, &partition) < 0)
+		return -EINVAL;
+
+	switch (partition) {
+	case 0x0:
+		cd->softreset = 0x78;
+		break;
+	case 0x1:
+		cd->softreset = 0x7c;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset);
+	return count;
+}
+static DEVICE_ATTR_RW(next_bitstream);
+
+/*
+ * Create device_attribute structures / params: name, mode, show, store
+ * additional flag if valid in VF
+ */
+static struct attribute *genwqe_attributes[] = {
+	&dev_attr_tempsens.attr,
+	&dev_attr_next_bitstream.attr,
+	&dev_attr_curr_bitstream.attr,
+	&dev_attr_base_clock.attr,
+	&dev_attr_driver.attr,
+	&dev_attr_type.attr,
+	&dev_attr_version.attr,
+	&dev_attr_appid.attr,
+	&dev_attr_status.attr,
+	&dev_attr_freerunning_timer.attr,
+	&dev_attr_queue_working_time.attr,
+	NULL,
+};
+
+static struct attribute *genwqe_normal_attributes[] = {
+	&dev_attr_driver.attr,
+	&dev_attr_type.attr,
+	&dev_attr_version.attr,
+	&dev_attr_appid.attr,
+	&dev_attr_status.attr,
+	&dev_attr_freerunning_timer.attr,
+	&dev_attr_queue_working_time.attr,
+	NULL,
+};
+
+/**
+ * genwqe_is_visible() - Determine if sysfs attribute should be visible or not
+ *
+ * VFs have restricted mmio capabilities, so not all sysfs entries
+ * are allowed in VFs.
+ */
+static umode_t genwqe_is_visible(struct kobject *kobj,
+				 struct attribute *attr, int n)
+{
+	unsigned int j;
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+	umode_t mode = attr->mode;
+
+	if (genwqe_is_privileged(cd))
+		return mode;
+
+	for (j = 0; genwqe_normal_attributes[j] != NULL;  j++)
+		if (genwqe_normal_attributes[j] == attr)
+			return mode;
+
+	return 0;
+}
+
+static struct attribute_group genwqe_attribute_group = {
+	.is_visible = genwqe_is_visible,
+	.attrs      = genwqe_attributes,
+};
+
+const struct attribute_group *genwqe_attribute_groups[] = {
+	&genwqe_attribute_group,
+	NULL,
+};
diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c
new file mode 100644
index 000000000000..6b1a6ef9f1a8
--- /dev/null
+++ b/drivers/misc/genwqe/card_utils.c
@@ -0,0 +1,944 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Miscelanous functionality used in the other GenWQE driver parts.
+ */
+
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/page-flags.h>
+#include <linux/scatterlist.h>
+#include <linux/hugetlb.h>
+#include <linux/iommu.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <asm/pgtable.h>
+
+#include "genwqe_driver.h"
+#include "card_base.h"
+#include "card_ddcb.h"
+
+/**
+ * __genwqe_writeq() - Write 64-bit register
+ * @cd:	        genwqe device descriptor
+ * @byte_offs:  byte offset within BAR
+ * @val:        64-bit value
+ *
+ * Return: 0 if success; < 0 if error
+ */
+int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val)
+{
+	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+		return -EIO;
+
+	if (cd->mmio == NULL)
+		return -EIO;
+
+	__raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs);
+	return 0;
+}
+
+/**
+ * __genwqe_readq() - Read 64-bit register
+ * @cd:         genwqe device descriptor
+ * @byte_offs:  offset within BAR
+ *
+ * Return: value from register
+ */
+u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs)
+{
+	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+		return 0xffffffffffffffffull;
+
+	if ((cd->err_inject & GENWQE_INJECT_GFIR_FATAL) &&
+	    (byte_offs == IO_SLC_CFGREG_GFIR))
+		return 0x000000000000ffffull;
+
+	if ((cd->err_inject & GENWQE_INJECT_GFIR_INFO) &&
+	    (byte_offs == IO_SLC_CFGREG_GFIR))
+		return 0x00000000ffff0000ull;
+
+	if (cd->mmio == NULL)
+		return 0xffffffffffffffffull;
+
+	return be64_to_cpu((__force __be64)__raw_readq(cd->mmio + byte_offs));
+}
+
+/**
+ * __genwqe_writel() - Write 32-bit register
+ * @cd:	        genwqe device descriptor
+ * @byte_offs:  byte offset within BAR
+ * @val:        32-bit value
+ *
+ * Return: 0 if success; < 0 if error
+ */
+int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val)
+{
+	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+		return -EIO;
+
+	if (cd->mmio == NULL)
+		return -EIO;
+
+	__raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs);
+	return 0;
+}
+
+/**
+ * __genwqe_readl() - Read 32-bit register
+ * @cd:         genwqe device descriptor
+ * @byte_offs:  offset within BAR
+ *
+ * Return: Value from register
+ */
+u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs)
+{
+	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+		return 0xffffffff;
+
+	if (cd->mmio == NULL)
+		return 0xffffffff;
+
+	return be32_to_cpu((__force __be32)__raw_readl(cd->mmio + byte_offs));
+}
+
+/**
+ * genwqe_read_app_id() - Extract app_id
+ *
+ * app_unitcfg need to be filled with valid data first
+ */
+int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len)
+{
+	int i, j;
+	u32 app_id = (u32)cd->app_unitcfg;
+
+	memset(app_name, 0, len);
+	for (i = 0, j = 0; j < min(len, 4); j++) {
+		char ch = (char)((app_id >> (24 - j*8)) & 0xff);
+		if (ch == ' ')
+			continue;
+		app_name[i++] = isprint(ch) ? ch : 'X';
+	}
+	return i;
+}
+
+/**
+ * genwqe_init_crc32() - Prepare a lookup table for fast crc32 calculations
+ *
+ * Existing kernel functions seem to use a different polynom,
+ * therefore we could not use them here.
+ *
+ * Genwqe's Polynomial = 0x20044009
+ */
+#define CRC32_POLYNOMIAL	0x20044009
+static u32 crc32_tab[256];	/* crc32 lookup table */
+
+void genwqe_init_crc32(void)
+{
+	int i, j;
+	u32 crc;
+
+	for (i = 0;  i < 256;  i++) {
+		crc = i << 24;
+		for (j = 0;  j < 8;  j++) {
+			if (crc & 0x80000000)
+				crc = (crc << 1) ^ CRC32_POLYNOMIAL;
+			else
+				crc = (crc << 1);
+		}
+		crc32_tab[i] = crc;
+	}
+}
+
+/**
+ * genwqe_crc32() - Generate 32-bit crc as required for DDCBs
+ * @buff:       pointer to data buffer
+ * @len:        length of data for calculation
+ * @init:       initial crc (0xffffffff at start)
+ *
+ * polynomial = x^32 * + x^29 + x^18 + x^14 + x^3 + 1 (0x20044009)
+
+ * Example: 4 bytes 0x01 0x02 0x03 0x04 with init=0xffffffff should
+ * result in a crc32 of 0xf33cb7d3.
+ *
+ * The existing kernel crc functions did not cover this polynom yet.
+ *
+ * Return: crc32 checksum.
+ */
+u32 genwqe_crc32(u8 *buff, size_t len, u32 init)
+{
+	int i;
+	u32 crc;
+
+	crc = init;
+	while (len--) {
+		i = ((crc >> 24) ^ *buff++) & 0xFF;
+		crc = (crc << 8) ^ crc32_tab[i];
+	}
+	return crc;
+}
+
+void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
+			       dma_addr_t *dma_handle)
+{
+	if (get_order(size) > MAX_ORDER)
+		return NULL;
+
+	return pci_alloc_consistent(cd->pci_dev, size, dma_handle);
+}
+
+void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
+			     void *vaddr, dma_addr_t dma_handle)
+{
+	if (vaddr == NULL)
+		return;
+
+	pci_free_consistent(cd->pci_dev, size, vaddr, dma_handle);
+}
+
+static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list,
+			      int num_pages)
+{
+	int i;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	for (i = 0; (i < num_pages) && (dma_list[i] != 0x0); i++) {
+		pci_unmap_page(pci_dev, dma_list[i],
+			       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+		dma_list[i] = 0x0;
+	}
+}
+
+static int genwqe_map_pages(struct genwqe_dev *cd,
+			   struct page **page_list, int num_pages,
+			   dma_addr_t *dma_list)
+{
+	int i;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	/* establish DMA mapping for requested pages */
+	for (i = 0; i < num_pages; i++) {
+		dma_addr_t daddr;
+
+		dma_list[i] = 0x0;
+		daddr = pci_map_page(pci_dev, page_list[i],
+				     0,	 /* map_offs */
+				     PAGE_SIZE,
+				     PCI_DMA_BIDIRECTIONAL);  /* FIXME rd/rw */
+
+		if (pci_dma_mapping_error(pci_dev, daddr)) {
+			dev_err(&pci_dev->dev,
+				"[%s] err: no dma addr daddr=%016llx!\n",
+				__func__, (long long)daddr);
+			goto err;
+		}
+
+		dma_list[i] = daddr;
+	}
+	return 0;
+
+ err:
+	genwqe_unmap_pages(cd, dma_list, num_pages);
+	return -EIO;
+}
+
+static int genwqe_sgl_size(int num_pages)
+{
+	int len, num_tlb = num_pages / 7;
+
+	len = sizeof(struct sg_entry) * (num_pages+num_tlb + 1);
+	return roundup(len, PAGE_SIZE);
+}
+
+struct sg_entry *genwqe_alloc_sgl(struct genwqe_dev *cd, int num_pages,
+				  dma_addr_t *dma_addr, size_t *sgl_size)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+	struct sg_entry *sgl;
+
+	*sgl_size = genwqe_sgl_size(num_pages);
+	if (get_order(*sgl_size) > MAX_ORDER) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: too much memory requested!\n", __func__);
+		return NULL;
+	}
+
+	sgl = __genwqe_alloc_consistent(cd, *sgl_size, dma_addr);
+	if (sgl == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: no memory available!\n", __func__);
+		return NULL;
+	}
+
+	return sgl;
+}
+
+int genwqe_setup_sgl(struct genwqe_dev *cd,
+		     unsigned long offs,
+		     unsigned long size,
+		     struct sg_entry *sgl,
+		     dma_addr_t dma_addr, size_t sgl_size,
+		     dma_addr_t *dma_list, int page_offs, int num_pages)
+{
+	int i = 0, j = 0, p;
+	unsigned long dma_offs, map_offs;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	dma_addr_t prev_daddr = 0;
+	struct sg_entry *s, *last_s = NULL;
+
+	/* sanity checks */
+	if (offs > PAGE_SIZE) {
+		dev_err(&pci_dev->dev,
+			"[%s] too large start offs %08lx\n", __func__, offs);
+		return -EFAULT;
+	}
+	if (sgl_size < genwqe_sgl_size(num_pages)) {
+		dev_err(&pci_dev->dev,
+			"[%s] sgl_size too small %08lx for %d pages\n",
+			__func__, sgl_size, num_pages);
+		return -EFAULT;
+	}
+
+	dma_offs = 128;		/* next block if needed/dma_offset */
+	map_offs = offs;	/* offset in first page */
+
+	s = &sgl[0];		/* first set of 8 entries */
+	p = 0;			/* page */
+	while (p < num_pages) {
+		dma_addr_t daddr;
+		unsigned int size_to_map;
+
+		/* always write the chaining entry, cleanup is done later */
+		j = 0;
+		s[j].target_addr = cpu_to_be64(dma_addr + dma_offs);
+		s[j].len	 = cpu_to_be32(128);
+		s[j].flags	 = cpu_to_be32(SG_CHAINED);
+		j++;
+
+		while (j < 8) {
+			/* DMA mapping for requested page, offs, size */
+			size_to_map = min(size, PAGE_SIZE - map_offs);
+			daddr = dma_list[page_offs + p] + map_offs;
+			size -= size_to_map;
+			map_offs = 0;
+
+			if (prev_daddr == daddr) {
+				u32 prev_len = be32_to_cpu(last_s->len);
+
+				/* pr_info("daddr combining: "
+					"%016llx/%08x -> %016llx\n",
+					prev_daddr, prev_len, daddr); */
+
+				last_s->len = cpu_to_be32(prev_len +
+							  size_to_map);
+
+				p++; /* process next page */
+				if (p == num_pages)
+					goto fixup;  /* nothing to do */
+
+				prev_daddr = daddr + size_to_map;
+				continue;
+			}
+
+			/* start new entry */
+			s[j].target_addr = cpu_to_be64(daddr);
+			s[j].len	 = cpu_to_be32(size_to_map);
+			s[j].flags	 = cpu_to_be32(SG_DATA);
+			prev_daddr = daddr + size_to_map;
+			last_s = &s[j];
+			j++;
+
+			p++;	/* process next page */
+			if (p == num_pages)
+				goto fixup;  /* nothing to do */
+		}
+		dma_offs += 128;
+		s += 8;		/* continue 8 elements further */
+	}
+ fixup:
+	if (j == 1) {		/* combining happend on last entry! */
+		s -= 8;		/* full shift needed on previous sgl block */
+		j =  7;		/* shift all elements */
+	}
+
+	for (i = 0; i < j; i++)	/* move elements 1 up */
+		s[i] = s[i + 1];
+
+	s[i].target_addr = cpu_to_be64(0);
+	s[i].len	 = cpu_to_be32(0);
+	s[i].flags	 = cpu_to_be32(SG_END_LIST);
+	return 0;
+}
+
+void genwqe_free_sgl(struct genwqe_dev *cd, struct sg_entry *sg_list,
+		    dma_addr_t dma_addr, size_t size)
+{
+	__genwqe_free_consistent(cd, size, sg_list, dma_addr);
+}
+
+/**
+ * free_user_pages() - Give pinned pages back
+ *
+ * Documentation of get_user_pages is in mm/memory.c:
+ *
+ * If the page is written to, set_page_dirty (or set_page_dirty_lock,
+ * as appropriate) must be called after the page is finished with, and
+ * before put_page is called.
+ *
+ * FIXME Could be of use to others and might belong in the generic
+ * code, if others agree. E.g.
+ *    ll_free_user_pages in drivers/staging/lustre/lustre/llite/rw26.c
+ *    ceph_put_page_vector in net/ceph/pagevec.c
+ *    maybe more?
+ */
+static int free_user_pages(struct page **page_list, unsigned int nr_pages,
+			   int dirty)
+{
+	unsigned int i;
+
+	for (i = 0; i < nr_pages; i++) {
+		if (page_list[i] != NULL) {
+			if (dirty)
+				set_page_dirty_lock(page_list[i]);
+			put_page(page_list[i]);
+		}
+	}
+	return 0;
+}
+
+/**
+ * genwqe_user_vmap() - Map user-space memory to virtual kernel memory
+ * @cd:         pointer to genwqe device
+ * @m:          mapping params
+ * @uaddr:      user virtual address
+ * @size:       size of memory to be mapped
+ *
+ * We need to think about how we could speed this up. Of course it is
+ * not a good idea to do this over and over again, like we are
+ * currently doing it. Nevertheless, I am curious where on the path
+ * the performance is spend. Most probably within the memory
+ * allocation functions, but maybe also in the DMA mapping code.
+ *
+ * Restrictions: The maximum size of the possible mapping currently depends
+ *               on the amount of memory we can get using kzalloc() for the
+ *               page_list and pci_alloc_consistent for the sg_list.
+ *               The sg_list is currently itself not scattered, which could
+ *               be fixed with some effort. The page_list must be split into
+ *               PAGE_SIZE chunks too. All that will make the complicated
+ *               code more complicated.
+ *
+ * Return: 0 if success
+ */
+int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr,
+		     unsigned long size, struct ddcb_requ *req)
+{
+	int rc = -EINVAL;
+	unsigned long data, offs;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if ((uaddr == NULL) || (size == 0)) {
+		m->size = 0;	/* mark unused and not added */
+		return -EINVAL;
+	}
+	m->u_vaddr = uaddr;
+	m->size    = size;
+
+	/* determine space needed for page_list. */
+	data = (unsigned long)uaddr;
+	offs = offset_in_page(data);
+	m->nr_pages = DIV_ROUND_UP(offs + size, PAGE_SIZE);
+
+	m->page_list = kcalloc(m->nr_pages,
+			       sizeof(struct page *) + sizeof(dma_addr_t),
+			       GFP_KERNEL);
+	if (!m->page_list) {
+		dev_err(&pci_dev->dev, "err: alloc page_list failed\n");
+		m->nr_pages = 0;
+		m->u_vaddr = NULL;
+		m->size = 0;	/* mark unused and not added */
+		return -ENOMEM;
+	}
+	m->dma_list = (dma_addr_t *)(m->page_list + m->nr_pages);
+
+	/* pin user pages in memory */
+	rc = get_user_pages_fast(data & PAGE_MASK, /* page aligned addr */
+				 m->nr_pages,
+				 1,		/* write by caller */
+				 m->page_list);	/* ptrs to pages */
+
+	/* assumption: get_user_pages can be killed by signals. */
+	if (rc < m->nr_pages) {
+		free_user_pages(m->page_list, rc, 0);
+		rc = -EFAULT;
+		goto fail_get_user_pages;
+	}
+
+	rc = genwqe_map_pages(cd, m->page_list, m->nr_pages, m->dma_list);
+	if (rc != 0)
+		goto fail_free_user_pages;
+
+	return 0;
+
+ fail_free_user_pages:
+	free_user_pages(m->page_list, m->nr_pages, 0);
+
+ fail_get_user_pages:
+	kfree(m->page_list);
+	m->page_list = NULL;
+	m->dma_list = NULL;
+	m->nr_pages = 0;
+	m->u_vaddr = NULL;
+	m->size = 0;		/* mark unused and not added */
+	return rc;
+}
+
+/**
+ * genwqe_user_vunmap() - Undo mapping of user-space mem to virtual kernel
+ *                        memory
+ * @cd:         pointer to genwqe device
+ * @m:          mapping params
+ */
+int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m,
+		       struct ddcb_requ *req)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!dma_mapping_used(m)) {
+		dev_err(&pci_dev->dev, "[%s] err: mapping %p not used!\n",
+			__func__, m);
+		return -EINVAL;
+	}
+
+	if (m->dma_list)
+		genwqe_unmap_pages(cd, m->dma_list, m->nr_pages);
+
+	if (m->page_list) {
+		free_user_pages(m->page_list, m->nr_pages, 1);
+
+		kfree(m->page_list);
+		m->page_list = NULL;
+		m->dma_list = NULL;
+		m->nr_pages = 0;
+	}
+
+	m->u_vaddr = NULL;
+	m->size = 0;		/* mark as unused and not added */
+	return 0;
+}
+
+/**
+ * genwqe_card_type() - Get chip type SLU Configuration Register
+ * @cd:         pointer to the genwqe device descriptor
+ * Return: 0: Altera Stratix-IV 230
+ *         1: Altera Stratix-IV 530
+ *         2: Altera Stratix-V A4
+ *         3: Altera Stratix-V A7
+ */
+u8 genwqe_card_type(struct genwqe_dev *cd)
+{
+	u64 card_type = cd->slu_unitcfg;
+	return (u8)((card_type & IO_SLU_UNITCFG_TYPE_MASK) >> 20);
+}
+
+/**
+ * genwqe_card_reset() - Reset the card
+ * @cd:         pointer to the genwqe device descriptor
+ */
+int genwqe_card_reset(struct genwqe_dev *cd)
+{
+	u64 softrst;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!genwqe_is_privileged(cd))
+		return -ENODEV;
+
+	/* new SL */
+	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 0x1ull);
+	msleep(1000);
+	__genwqe_readq(cd, IO_HSU_FIR_CLR);
+	__genwqe_readq(cd, IO_APP_FIR_CLR);
+	__genwqe_readq(cd, IO_SLU_FIR_CLR);
+
+	/*
+	 * Read-modify-write to preserve the stealth bits
+	 *
+	 * For SL >= 039, Stealth WE bit allows removing
+	 * the read-modify-wrote.
+	 * r-m-w may require a mask 0x3C to avoid hitting hard
+	 * reset again for error reset (should be 0, chicken).
+	 */
+	softrst = __genwqe_readq(cd, IO_SLC_CFGREG_SOFTRESET) & 0x3cull;
+	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, softrst | 0x2ull);
+
+	/* give ERRORRESET some time to finish */
+	msleep(50);
+
+	if (genwqe_need_err_masking(cd)) {
+		dev_info(&pci_dev->dev,
+			 "[%s] masking errors for old bitstreams\n", __func__);
+		__genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull);
+	}
+	return 0;
+}
+
+int genwqe_read_softreset(struct genwqe_dev *cd)
+{
+	u64 bitstream;
+
+	if (!genwqe_is_privileged(cd))
+		return -ENODEV;
+
+	bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1;
+	cd->softreset = (bitstream == 0) ? 0x8ull : 0xcull;
+	return 0;
+}
+
+/**
+ * genwqe_set_interrupt_capability() - Configure MSI capability structure
+ * @cd:         pointer to the device
+ * Return: 0 if no error
+ */
+int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count)
+{
+	int rc;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	rc = pci_enable_msi_block(pci_dev, count);
+	if (rc == 0)
+		cd->flags |= GENWQE_FLAG_MSI_ENABLED;
+	return rc;
+}
+
+/**
+ * genwqe_reset_interrupt_capability() - Undo genwqe_set_interrupt_capability()
+ * @cd:         pointer to the device
+ */
+void genwqe_reset_interrupt_capability(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (cd->flags & GENWQE_FLAG_MSI_ENABLED) {
+		pci_disable_msi(pci_dev);
+		cd->flags &= ~GENWQE_FLAG_MSI_ENABLED;
+	}
+}
+
+/**
+ * set_reg_idx() - Fill array with data. Ignore illegal offsets.
+ * @cd:         card device
+ * @r:          debug register array
+ * @i:          index to desired entry
+ * @m:          maximum possible entries
+ * @addr:       addr which is read
+ * @index:      index in debug array
+ * @val:        read value
+ */
+static int set_reg_idx(struct genwqe_dev *cd, struct genwqe_reg *r,
+		       unsigned int *i, unsigned int m, u32 addr, u32 idx,
+		       u64 val)
+{
+	if (WARN_ON_ONCE(*i >= m))
+		return -EFAULT;
+
+	r[*i].addr = addr;
+	r[*i].idx = idx;
+	r[*i].val = val;
+	++*i;
+	return 0;
+}
+
+static int set_reg(struct genwqe_dev *cd, struct genwqe_reg *r,
+		   unsigned int *i, unsigned int m, u32 addr, u64 val)
+{
+	return set_reg_idx(cd, r, i, m, addr, 0, val);
+}
+
+int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs,
+			 unsigned int max_regs, int all)
+{
+	unsigned int i, j, idx = 0;
+	u32 ufir_addr, ufec_addr, sfir_addr, sfec_addr;
+	u64 gfir, sluid, appid, ufir, ufec, sfir, sfec;
+
+	/* Global FIR */
+	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	set_reg(cd, regs, &idx, max_regs, IO_SLC_CFGREG_GFIR, gfir);
+
+	/* UnitCfg for SLU */
+	sluid = __genwqe_readq(cd, IO_SLU_UNITCFG); /* 0x00000000 */
+	set_reg(cd, regs, &idx, max_regs, IO_SLU_UNITCFG, sluid);
+
+	/* UnitCfg for APP */
+	appid = __genwqe_readq(cd, IO_APP_UNITCFG); /* 0x02000000 */
+	set_reg(cd, regs, &idx, max_regs, IO_APP_UNITCFG, appid);
+
+	/* Check all chip Units */
+	for (i = 0; i < GENWQE_MAX_UNITS; i++) {
+
+		/* Unit FIR */
+		ufir_addr = (i << 24) | 0x008;
+		ufir = __genwqe_readq(cd, ufir_addr);
+		set_reg(cd, regs, &idx, max_regs, ufir_addr, ufir);
+
+		/* Unit FEC */
+		ufec_addr = (i << 24) | 0x018;
+		ufec = __genwqe_readq(cd, ufec_addr);
+		set_reg(cd, regs, &idx, max_regs, ufec_addr, ufec);
+
+		for (j = 0; j < 64; j++) {
+			/* wherever there is a primary 1, read the 2ndary */
+			if (!all && (!(ufir & (1ull << j))))
+				continue;
+
+			sfir_addr = (i << 24) | (0x100 + 8 * j);
+			sfir = __genwqe_readq(cd, sfir_addr);
+			set_reg(cd, regs, &idx, max_regs, sfir_addr, sfir);
+
+			sfec_addr = (i << 24) | (0x300 + 8 * j);
+			sfec = __genwqe_readq(cd, sfec_addr);
+			set_reg(cd, regs, &idx, max_regs, sfec_addr, sfec);
+		}
+	}
+
+	/* fill with invalid data until end */
+	for (i = idx; i < max_regs; i++) {
+		regs[i].addr = 0xffffffff;
+		regs[i].val = 0xffffffffffffffffull;
+	}
+	return idx;
+}
+
+/**
+ * genwqe_ffdc_buff_size() - Calculates the number of dump registers
+ */
+int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int uid)
+{
+	int entries = 0, ring, traps, traces, trace_entries;
+	u32 eevptr_addr, l_addr, d_len, d_type;
+	u64 eevptr, val, addr;
+
+	eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER;
+	eevptr = __genwqe_readq(cd, eevptr_addr);
+
+	if ((eevptr != 0x0) && (eevptr != -1ull)) {
+		l_addr = GENWQE_UID_OFFS(uid) | eevptr;
+
+		while (1) {
+			val = __genwqe_readq(cd, l_addr);
+
+			if ((val == 0x0) || (val == -1ull))
+				break;
+
+			/* 38:24 */
+			d_len  = (val & 0x0000007fff000000ull) >> 24;
+
+			/* 39 */
+			d_type = (val & 0x0000008000000000ull) >> 36;
+
+			if (d_type) {	/* repeat */
+				entries += d_len;
+			} else {	/* size in bytes! */
+				entries += d_len >> 3;
+			}
+
+			l_addr += 8;
+		}
+	}
+
+	for (ring = 0; ring < 8; ring++) {
+		addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring);
+		val = __genwqe_readq(cd, addr);
+
+		if ((val == 0x0ull) || (val == -1ull))
+			continue;
+
+		traps = (val >> 24) & 0xff;
+		traces = (val >> 16) & 0xff;
+		trace_entries = val & 0xffff;
+
+		entries += traps + (traces * trace_entries);
+	}
+	return entries;
+}
+
+/**
+ * genwqe_ffdc_buff_read() - Implements LogoutExtendedErrorRegisters procedure
+ */
+int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int uid,
+			  struct genwqe_reg *regs, unsigned int max_regs)
+{
+	int i, traps, traces, trace, trace_entries, trace_entry, ring;
+	unsigned int idx = 0;
+	u32 eevptr_addr, l_addr, d_addr, d_len, d_type;
+	u64 eevptr, e, val, addr;
+
+	eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER;
+	eevptr = __genwqe_readq(cd, eevptr_addr);
+
+	if ((eevptr != 0x0) && (eevptr != 0xffffffffffffffffull)) {
+		l_addr = GENWQE_UID_OFFS(uid) | eevptr;
+		while (1) {
+			e = __genwqe_readq(cd, l_addr);
+			if ((e == 0x0) || (e == 0xffffffffffffffffull))
+				break;
+
+			d_addr = (e & 0x0000000000ffffffull);	    /* 23:0 */
+			d_len  = (e & 0x0000007fff000000ull) >> 24; /* 38:24 */
+			d_type = (e & 0x0000008000000000ull) >> 36; /* 39 */
+			d_addr |= GENWQE_UID_OFFS(uid);
+
+			if (d_type) {
+				for (i = 0; i < (int)d_len; i++) {
+					val = __genwqe_readq(cd, d_addr);
+					set_reg_idx(cd, regs, &idx, max_regs,
+						    d_addr, i, val);
+				}
+			} else {
+				d_len >>= 3; /* Size in bytes! */
+				for (i = 0; i < (int)d_len; i++, d_addr += 8) {
+					val = __genwqe_readq(cd, d_addr);
+					set_reg_idx(cd, regs, &idx, max_regs,
+						    d_addr, 0, val);
+				}
+			}
+			l_addr += 8;
+		}
+	}
+
+	/*
+	 * To save time, there are only 6 traces poplulated on Uid=2,
+	 * Ring=1. each with iters=512.
+	 */
+	for (ring = 0; ring < 8; ring++) { /* 0 is fls, 1 is fds,
+					      2...7 are ASI rings */
+		addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring);
+		val = __genwqe_readq(cd, addr);
+
+		if ((val == 0x0ull) || (val == -1ull))
+			continue;
+
+		traps = (val >> 24) & 0xff;	/* Number of Traps	*/
+		traces = (val >> 16) & 0xff;	/* Number of Traces	*/
+		trace_entries = val & 0xffff;	/* Entries per trace	*/
+
+		/* Note: This is a combined loop that dumps both the traps */
+		/* (for the trace == 0 case) as well as the traces 1 to    */
+		/* 'traces'.						   */
+		for (trace = 0; trace <= traces; trace++) {
+			u32 diag_sel =
+				GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace);
+
+			addr = (GENWQE_UID_OFFS(uid) |
+				IO_EXTENDED_DIAG_SELECTOR);
+			__genwqe_writeq(cd, addr, diag_sel);
+
+			for (trace_entry = 0;
+			     trace_entry < (trace ? trace_entries : traps);
+			     trace_entry++) {
+				addr = (GENWQE_UID_OFFS(uid) |
+					IO_EXTENDED_DIAG_READ_MBX);
+				val = __genwqe_readq(cd, addr);
+				set_reg_idx(cd, regs, &idx, max_regs, addr,
+					    (diag_sel<<16) | trace_entry, val);
+			}
+		}
+	}
+	return 0;
+}
+
+/**
+ * genwqe_write_vreg() - Write register in virtual window
+ *
+ * Note, these registers are only accessible to the PF through the
+ * VF-window. It is not intended for the VF to access.
+ */
+int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func)
+{
+	__genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf);
+	__genwqe_writeq(cd, reg, val);
+	return 0;
+}
+
+/**
+ * genwqe_read_vreg() - Read register in virtual window
+ *
+ * Note, these registers are only accessible to the PF through the
+ * VF-window. It is not intended for the VF to access.
+ */
+u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func)
+{
+	__genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf);
+	return __genwqe_readq(cd, reg);
+}
+
+/**
+ * genwqe_base_clock_frequency() - Deteremine base clock frequency of the card
+ *
+ * Note: From a design perspective it turned out to be a bad idea to
+ * use codes here to specifiy the frequency/speed values. An old
+ * driver cannot understand new codes and is therefore always a
+ * problem. Better is to measure out the value or put the
+ * speed/frequency directly into a register which is always a valid
+ * value for old as well as for new software.
+ *
+ * Return: Card clock in MHz
+ */
+int genwqe_base_clock_frequency(struct genwqe_dev *cd)
+{
+	u16 speed;		/*         MHz  MHz  MHz  MHz */
+	static const int speed_grade[] = { 250, 200, 166, 175 };
+
+	speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full);
+	if (speed >= ARRAY_SIZE(speed_grade))
+		return 0;	/* illegal value */
+
+	return speed_grade[speed];
+}
+
+/**
+ * genwqe_stop_traps() - Stop traps
+ *
+ * Before reading out the analysis data, we need to stop the traps.
+ */
+void genwqe_stop_traps(struct genwqe_dev *cd)
+{
+	__genwqe_writeq(cd, IO_SLC_MISC_DEBUG_SET, 0xcull);
+}
+
+/**
+ * genwqe_start_traps() - Start traps
+ *
+ * After having read the data, we can/must enable the traps again.
+ */
+void genwqe_start_traps(struct genwqe_dev *cd)
+{
+	__genwqe_writeq(cd, IO_SLC_MISC_DEBUG_CLR, 0xcull);
+
+	if (genwqe_need_err_masking(cd))
+		__genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull);
+}
diff --git a/drivers/misc/genwqe/genwqe_driver.h b/drivers/misc/genwqe/genwqe_driver.h
new file mode 100644
index 000000000000..46e916b36c70
--- /dev/null
+++ b/drivers/misc/genwqe/genwqe_driver.h
@@ -0,0 +1,77 @@
+#ifndef __GENWQE_DRIVER_H__
+#define __GENWQE_DRIVER_H__
+
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/cdev.h>
+#include <linux/list.h>
+#include <linux/kthread.h>
+#include <linux/scatterlist.h>
+#include <linux/iommu.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+
+#include <asm/byteorder.h>
+#include <linux/genwqe/genwqe_card.h>
+
+#define DRV_VERS_STRING		"2.0.0"
+
+/*
+ * Static minor number assignement, until we decide/implement
+ * something dynamic.
+ */
+#define GENWQE_MAX_MINOR	128 /* up to 128 possible genwqe devices */
+
+/**
+ * genwqe_requ_alloc() - Allocate a new DDCB execution request
+ *
+ * This data structure contains the user visiable fields of the DDCB
+ * to be executed.
+ *
+ * Return: ptr to genwqe_ddcb_cmd data structure
+ */
+struct genwqe_ddcb_cmd *ddcb_requ_alloc(void);
+
+/**
+ * ddcb_requ_free() - Free DDCB execution request.
+ * @req:       ptr to genwqe_ddcb_cmd data structure.
+ */
+void ddcb_requ_free(struct genwqe_ddcb_cmd *req);
+
+u32  genwqe_crc32(u8 *buff, size_t len, u32 init);
+
+static inline void genwqe_hexdump(struct pci_dev *pci_dev,
+				  const void *buff, unsigned int size)
+{
+	char prefix[32];
+
+	scnprintf(prefix, sizeof(prefix), "%s %s: ",
+		  GENWQE_DEVNAME, pci_name(pci_dev));
+
+	print_hex_dump_debug(prefix, DUMP_PREFIX_OFFSET, 16, 1, buff,
+			     size, true);
+}
+
+#endif	/* __GENWQE_DRIVER_H__ */
diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
index a2edb2ee0921..49c7a23f02fc 100644
--- a/drivers/misc/lkdtm.c
+++ b/drivers/misc/lkdtm.c
@@ -224,7 +224,7 @@ static int jp_scsi_dispatch_cmd(struct scsi_cmnd *cmd)
 }
 
 #ifdef CONFIG_IDE
-int jp_generic_ide_ioctl(ide_drive_t *drive, struct file *file,
+static int jp_generic_ide_ioctl(ide_drive_t *drive, struct file *file,
 			struct block_device *bdev, unsigned int cmd,
 			unsigned long arg)
 {
@@ -334,9 +334,10 @@ static void execute_location(void *dst)
 
 static void execute_user_location(void *dst)
 {
+	/* Intentionally crossing kernel/user memory boundary. */
 	void (*func)(void) = dst;
 
-	if (copy_to_user(dst, do_nothing, EXEC_SIZE))
+	if (copy_to_user((void __user *)dst, do_nothing, EXEC_SIZE))
 		return;
 	func();
 }
@@ -408,6 +409,8 @@ static void lkdtm_do_action(enum ctype which)
 	case CT_SPINLOCKUP:
 		/* Must be called twice to trigger. */
 		spin_lock(&lock_me_up);
+		/* Let sparse know we intended to exit holding the lock. */
+		__release(&lock_me_up);
 		break;
 	case CT_HUNG_TASK:
 		set_current_state(TASK_UNINTERRUPTIBLE);
diff --git a/drivers/misc/mei/amthif.c b/drivers/misc/mei/amthif.c
index d22c6864508b..2fad84432829 100644
--- a/drivers/misc/mei/amthif.c
+++ b/drivers/misc/mei/amthif.c
@@ -177,7 +177,7 @@ int mei_amthif_read(struct mei_device *dev, struct file *file,
 	unsigned long timeout;
 	int i;
 
-	/* Only Posible if we are in timeout */
+	/* Only possible if we are in timeout */
 	if (!cl || cl != &dev->iamthif_cl) {
 		dev_dbg(&dev->pdev->dev, "bad file ext.\n");
 		return -ETIMEDOUT;
@@ -249,7 +249,7 @@ int mei_amthif_read(struct mei_device *dev, struct file *file,
 	    cb->response_buffer.size);
 	dev_dbg(&dev->pdev->dev, "amthif cb->buf_idx - %lu\n", cb->buf_idx);
 
-	/* length is being turncated to PAGE_SIZE, however,
+	/* length is being truncated to PAGE_SIZE, however,
 	 * the buf_idx may point beyond */
 	length = min_t(size_t, length, (cb->buf_idx - *offset));
 
@@ -316,6 +316,7 @@ static int mei_amthif_send_cmd(struct mei_device *dev, struct mei_cl_cb *cb)
 		mei_hdr.host_addr = dev->iamthif_cl.host_client_id;
 		mei_hdr.me_addr = dev->iamthif_cl.me_client_id;
 		mei_hdr.reserved = 0;
+		mei_hdr.internal = 0;
 		dev->iamthif_msg_buf_index += mei_hdr.length;
 		ret = mei_write_message(dev, &mei_hdr, dev->iamthif_msg_buf);
 		if (ret)
@@ -477,6 +478,7 @@ int mei_amthif_irq_write_complete(struct mei_cl *cl, struct mei_cl_cb *cb,
 	mei_hdr.host_addr = cl->host_client_id;
 	mei_hdr.me_addr = cl->me_client_id;
 	mei_hdr.reserved = 0;
+	mei_hdr.internal = 0;
 
 	if (*slots >= msg_slots) {
 		mei_hdr.length = len;
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index 87c96e4669e2..4f268a377720 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -154,7 +154,7 @@ int mei_io_cb_alloc_req_buf(struct mei_cl_cb *cb, size_t length)
 	return 0;
 }
 /**
- * mei_io_cb_alloc_resp_buf - allocate respose buffer
+ * mei_io_cb_alloc_resp_buf - allocate response buffer
  *
  * @cb: io callback structure
  * @length: size of the buffer
@@ -207,7 +207,7 @@ int mei_cl_flush_queues(struct mei_cl *cl)
 
 
 /**
- * mei_cl_init - initializes intialize cl.
+ * mei_cl_init - initializes cl.
  *
  * @cl: host client to be initialized
  * @dev: mei device
@@ -263,10 +263,10 @@ struct mei_cl_cb *mei_cl_find_read_cb(struct mei_cl *cl)
 	return NULL;
 }
 
-/** mei_cl_link: allocte host id in the host map
+/** mei_cl_link: allocate host id in the host map
  *
  * @cl - host client
- * @id - fixed host id or -1 for genereting one
+ * @id - fixed host id or -1 for generic one
  *
  * returns 0 on success
  *	-EINVAL on incorrect values
@@ -282,19 +282,19 @@ int mei_cl_link(struct mei_cl *cl, int id)
 
 	dev = cl->dev;
 
-	/* If Id is not asigned get one*/
+	/* If Id is not assigned get one*/
 	if (id == MEI_HOST_CLIENT_ID_ANY)
 		id = find_first_zero_bit(dev->host_clients_map,
 					MEI_CLIENTS_MAX);
 
 	if (id >= MEI_CLIENTS_MAX) {
-		dev_err(&dev->pdev->dev, "id exceded %d", MEI_CLIENTS_MAX) ;
+		dev_err(&dev->pdev->dev, "id exceeded %d", MEI_CLIENTS_MAX);
 		return -EMFILE;
 	}
 
 	open_handle_count = dev->open_handle_count + dev->iamthif_open_count;
 	if (open_handle_count >= MEI_MAX_OPEN_HANDLE_COUNT) {
-		dev_err(&dev->pdev->dev, "open_handle_count exceded %d",
+		dev_err(&dev->pdev->dev, "open_handle_count exceeded %d",
 			MEI_MAX_OPEN_HANDLE_COUNT);
 		return -EMFILE;
 	}
@@ -344,8 +344,6 @@ int mei_cl_unlink(struct mei_cl *cl)
 
 	cl->state = MEI_FILE_INITIALIZING;
 
-	list_del_init(&cl->link);
-
 	return 0;
 }
 
@@ -378,7 +376,7 @@ void mei_host_client_init(struct work_struct *work)
 
 
 /**
- * mei_cl_disconnect - disconnect host clinet form the me one
+ * mei_cl_disconnect - disconnect host client from the me one
  *
  * @cl: host client
  *
@@ -457,7 +455,7 @@ free:
  *
  * @cl: private data of the file object
  *
- * returns ture if other client is connected, 0 - otherwise.
+ * returns true if other client is connected, false - otherwise.
  */
 bool mei_cl_is_other_connecting(struct mei_cl *cl)
 {
@@ -481,7 +479,7 @@ bool mei_cl_is_other_connecting(struct mei_cl *cl)
 }
 
 /**
- * mei_cl_connect - connect host clinet to the me one
+ * mei_cl_connect - connect host client to the me one
  *
  * @cl: host client
  *
@@ -729,6 +727,7 @@ int mei_cl_irq_write_complete(struct mei_cl *cl, struct mei_cl_cb *cb,
 	mei_hdr.host_addr = cl->host_client_id;
 	mei_hdr.me_addr = cl->me_client_id;
 	mei_hdr.reserved = 0;
+	mei_hdr.internal = cb->internal;
 
 	if (*slots >= msg_slots) {
 		mei_hdr.length = len;
@@ -775,7 +774,7 @@ int mei_cl_irq_write_complete(struct mei_cl *cl, struct mei_cl_cb *cb,
  * @cl: host client
  * @cl: write callback with filled data
  *
- * returns numbe of bytes sent on success, <0 on failure.
+ * returns number of bytes sent on success, <0 on failure.
  */
 int mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, bool blocking)
 {
@@ -828,6 +827,7 @@ int mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, bool blocking)
 	mei_hdr.host_addr = cl->host_client_id;
 	mei_hdr.me_addr = cl->me_client_id;
 	mei_hdr.reserved = 0;
+	mei_hdr.internal = cb->internal;
 
 
 	rets = mei_write_message(dev, &mei_hdr, buf->data);
diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c
index e3870f22d238..a3ae154444b2 100644
--- a/drivers/misc/mei/debugfs.c
+++ b/drivers/misc/mei/debugfs.c
@@ -43,7 +43,7 @@ static ssize_t mei_dbgfs_read_meclients(struct file *fp, char __user *ubuf,
 
 	mutex_lock(&dev->device_lock);
 
-	/*  if the driver is not enabled the list won't b consitent */
+	/*  if the driver is not enabled the list won't be consistent */
 	if (dev->dev_state != MEI_DEV_ENABLED)
 		goto out;
 
@@ -101,7 +101,7 @@ static const struct file_operations mei_dbgfs_fops_devstate = {
 
 /**
  * mei_dbgfs_deregister - Remove the debugfs files and directories
- * @mei - pointer to mei device private dat
+ * @mei - pointer to mei device private data
  */
 void mei_dbgfs_deregister(struct mei_device *dev)
 {
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index 9b3a0fb7f265..8520cdfa2f05 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -28,9 +28,9 @@
  *
  * @dev: the device structure
  *
- * returns none.
+ * returns 0 on success -ENOMEM on allocation failure
  */
-static void mei_hbm_me_cl_allocate(struct mei_device *dev)
+static int mei_hbm_me_cl_allocate(struct mei_device *dev)
 {
 	struct mei_me_client *clients;
 	int b;
@@ -44,7 +44,7 @@ static void mei_hbm_me_cl_allocate(struct mei_device *dev)
 		dev->me_clients_num++;
 
 	if (dev->me_clients_num == 0)
-		return;
+		return 0;
 
 	kfree(dev->me_clients);
 	dev->me_clients = NULL;
@@ -56,12 +56,10 @@ static void mei_hbm_me_cl_allocate(struct mei_device *dev)
 			sizeof(struct mei_me_client), GFP_KERNEL);
 	if (!clients) {
 		dev_err(&dev->pdev->dev, "memory allocation for ME clients failed.\n");
-		dev->dev_state = MEI_DEV_RESETTING;
-		mei_reset(dev, 1);
-		return;
+		return -ENOMEM;
 	}
 	dev->me_clients = clients;
-	return;
+	return 0;
 }
 
 /**
@@ -85,12 +83,12 @@ void mei_hbm_cl_hdr(struct mei_cl *cl, u8 hbm_cmd, void *buf, size_t len)
 }
 
 /**
- * same_disconn_addr - tells if they have the same address
+ * mei_hbm_cl_addr_equal - tells if they have the same address
  *
- * @file: private data of the file object.
- * @disconn: disconnection request.
+ * @cl: - client
+ * @buf: buffer with cl header
  *
- * returns true if addres are same
+ * returns true if addresses are the same
  */
 static inline
 bool mei_hbm_cl_addr_equal(struct mei_cl *cl, void *buf)
@@ -128,6 +126,17 @@ static bool is_treat_specially_client(struct mei_cl *cl,
 	return false;
 }
 
+/**
+ * mei_hbm_idle - set hbm to idle state
+ *
+ * @dev: the device structure
+ */
+void mei_hbm_idle(struct mei_device *dev)
+{
+	dev->init_clients_timer = 0;
+	dev->hbm_state = MEI_HBM_IDLE;
+}
+
 int mei_hbm_start_wait(struct mei_device *dev)
 {
 	int ret;
@@ -137,7 +146,7 @@ int mei_hbm_start_wait(struct mei_device *dev)
 	mutex_unlock(&dev->device_lock);
 	ret = wait_event_interruptible_timeout(dev->wait_recvd_msg,
 			dev->hbm_state == MEI_HBM_IDLE ||
-			dev->hbm_state > MEI_HBM_START,
+			dev->hbm_state >= MEI_HBM_STARTED,
 			mei_secs_to_jiffies(MEI_INTEROP_TIMEOUT));
 	mutex_lock(&dev->device_lock);
 
@@ -153,12 +162,15 @@ int mei_hbm_start_wait(struct mei_device *dev)
  * mei_hbm_start_req - sends start request message.
  *
  * @dev: the device structure
+ *
+ * returns 0 on success and < 0 on failure
  */
 int mei_hbm_start_req(struct mei_device *dev)
 {
 	struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
 	struct hbm_host_version_request *start_req;
 	const size_t len = sizeof(struct hbm_host_version_request);
+	int ret;
 
 	mei_hbm_hdr(mei_hdr, len);
 
@@ -170,12 +182,13 @@ int mei_hbm_start_req(struct mei_device *dev)
 	start_req->host_version.minor_version = HBM_MINOR_VERSION;
 
 	dev->hbm_state = MEI_HBM_IDLE;
-	if (mei_write_message(dev, mei_hdr, dev->wr_msg.data)) {
-		dev_err(&dev->pdev->dev, "version message write failed\n");
-		dev->dev_state = MEI_DEV_RESETTING;
-		mei_reset(dev, 1);
-		return -EIO;
+	ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+	if (ret) {
+		dev_err(&dev->pdev->dev, "version message write failed: ret = %d\n",
+			ret);
+		return ret;
 	}
+
 	dev->hbm_state = MEI_HBM_START;
 	dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT;
 	return 0;
@@ -186,13 +199,15 @@ int mei_hbm_start_req(struct mei_device *dev)
  *
  * @dev: the device structure
  *
- * returns none.
+ * returns 0 on success and < 0 on failure
  */
-static void mei_hbm_enum_clients_req(struct mei_device *dev)
+static int mei_hbm_enum_clients_req(struct mei_device *dev)
 {
 	struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
 	struct hbm_host_enum_request *enum_req;
 	const size_t len = sizeof(struct hbm_host_enum_request);
+	int ret;
+
 	/* enumerate clients */
 	mei_hbm_hdr(mei_hdr, len);
 
@@ -200,14 +215,15 @@ static void mei_hbm_enum_clients_req(struct mei_device *dev)
 	memset(enum_req, 0, len);
 	enum_req->hbm_cmd = HOST_ENUM_REQ_CMD;
 
-	if (mei_write_message(dev, mei_hdr, dev->wr_msg.data)) {
-		dev->dev_state = MEI_DEV_RESETTING;
-		dev_err(&dev->pdev->dev, "enumeration request write failed.\n");
-		mei_reset(dev, 1);
+	ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+	if (ret) {
+		dev_err(&dev->pdev->dev, "enumeration request write failed: ret = %d.\n",
+			ret);
+		return ret;
 	}
 	dev->hbm_state = MEI_HBM_ENUM_CLIENTS;
 	dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT;
-	return;
+	return 0;
 }
 
 /**
@@ -215,7 +231,7 @@ static void mei_hbm_enum_clients_req(struct mei_device *dev)
  *
  * @dev: the device structure
  *
- * returns none.
+ * returns 0 on success and < 0 on failure
  */
 
 static int mei_hbm_prop_req(struct mei_device *dev)
@@ -226,7 +242,7 @@ static int mei_hbm_prop_req(struct mei_device *dev)
 	const size_t len = sizeof(struct hbm_props_request);
 	unsigned long next_client_index;
 	unsigned long client_num;
-
+	int ret;
 
 	client_num = dev->me_client_presentation_num;
 
@@ -253,12 +269,11 @@ static int mei_hbm_prop_req(struct mei_device *dev)
 	prop_req->hbm_cmd = HOST_CLIENT_PROPERTIES_REQ_CMD;
 	prop_req->address = next_client_index;
 
-	if (mei_write_message(dev, mei_hdr, dev->wr_msg.data)) {
-		dev->dev_state = MEI_DEV_RESETTING;
-		dev_err(&dev->pdev->dev, "properties request write failed\n");
-		mei_reset(dev, 1);
-
-		return -EIO;
+	ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+	if (ret) {
+		dev_err(&dev->pdev->dev, "properties request write failed: ret = %d\n",
+			ret);
+		return ret;
 	}
 
 	dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT;
@@ -268,7 +283,7 @@ static int mei_hbm_prop_req(struct mei_device *dev)
 }
 
 /**
- * mei_hbm_stop_req_prepare - perpare stop request message
+ * mei_hbm_stop_req_prepare - prepare stop request message
  *
  * @dev - mei device
  * @mei_hdr - mei message header
@@ -289,7 +304,7 @@ static void mei_hbm_stop_req_prepare(struct mei_device *dev,
 }
 
 /**
- * mei_hbm_cl_flow_control_req - sends flow control requst.
+ * mei_hbm_cl_flow_control_req - sends flow control request.
  *
  * @dev: the device structure
  * @cl: client info
@@ -451,7 +466,7 @@ int mei_hbm_cl_connect_req(struct mei_device *dev, struct mei_cl *cl)
 }
 
 /**
- * mei_hbm_cl_connect_res - connect resposne from the ME
+ * mei_hbm_cl_connect_res - connect response from the ME
  *
  * @dev: the device structure
  * @rs: connect response bus message
@@ -505,8 +520,8 @@ static void mei_hbm_cl_connect_res(struct mei_device *dev,
 
 
 /**
- * mei_hbm_fw_disconnect_req - disconnect request initiated by me
- *  host sends disoconnect response
+ * mei_hbm_fw_disconnect_req - disconnect request initiated by ME firmware
+ *  host sends disconnect response
  *
  * @dev: the device structure.
  * @disconnect_req: disconnect request bus message from the me
@@ -559,8 +574,10 @@ bool mei_hbm_version_is_supported(struct mei_device *dev)
  *
  * @dev: the device structure
  * @mei_hdr: header of bus message
+ *
+ * returns 0 on success and < 0 on failure
  */
-void mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
+int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 {
 	struct mei_bus_message *mei_msg;
 	struct mei_me_client *me_client;
@@ -577,8 +594,20 @@ void mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 	mei_read_slots(dev, dev->rd_msg_buf, hdr->length);
 	mei_msg = (struct mei_bus_message *)dev->rd_msg_buf;
 
+	/* ignore spurious message and prevent reset nesting
+	 * hbm is put to idle during system reset
+	 */
+	if (dev->hbm_state == MEI_HBM_IDLE) {
+		dev_dbg(&dev->pdev->dev, "hbm: state is idle ignore spurious messages\n");
+		return 0;
+	}
+
 	switch (mei_msg->hbm_cmd) {
 	case HOST_START_RES_CMD:
+		dev_dbg(&dev->pdev->dev, "hbm: start: response message received.\n");
+
+		dev->init_clients_timer = 0;
+
 		version_res = (struct hbm_host_version_response *)mei_msg;
 
 		dev_dbg(&dev->pdev->dev, "HBM VERSION: DRIVER=%02d:%02d DEVICE=%02d:%02d\n",
@@ -597,73 +626,89 @@ void mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 		}
 
 		if (!mei_hbm_version_is_supported(dev)) {
-			dev_warn(&dev->pdev->dev, "hbm version mismatch: stopping the driver.\n");
+			dev_warn(&dev->pdev->dev, "hbm: start: version mismatch - stopping the driver.\n");
 
-			dev->hbm_state = MEI_HBM_STOP;
+			dev->hbm_state = MEI_HBM_STOPPED;
 			mei_hbm_stop_req_prepare(dev, &dev->wr_msg.hdr,
 						dev->wr_msg.data);
-			mei_write_message(dev, &dev->wr_msg.hdr,
-					dev->wr_msg.data);
+			if (mei_write_message(dev, &dev->wr_msg.hdr,
+					dev->wr_msg.data)) {
+				dev_err(&dev->pdev->dev, "hbm: start: failed to send stop request\n");
+				return -EIO;
+			}
+			break;
+		}
 
-			return;
+		if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+		    dev->hbm_state != MEI_HBM_START) {
+			dev_err(&dev->pdev->dev, "hbm: start: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
 		}
 
-		if (dev->dev_state == MEI_DEV_INIT_CLIENTS &&
-		    dev->hbm_state == MEI_HBM_START) {
-			dev->init_clients_timer = 0;
-			mei_hbm_enum_clients_req(dev);
-		} else {
-			dev_err(&dev->pdev->dev, "reset: wrong host start response\n");
-			mei_reset(dev, 1);
-			return;
+		dev->hbm_state = MEI_HBM_STARTED;
+
+		if (mei_hbm_enum_clients_req(dev)) {
+			dev_err(&dev->pdev->dev, "hbm: start: failed to send enumeration request\n");
+			return -EIO;
 		}
 
 		wake_up_interruptible(&dev->wait_recvd_msg);
-		dev_dbg(&dev->pdev->dev, "host start response message received.\n");
 		break;
 
 	case CLIENT_CONNECT_RES_CMD:
+		dev_dbg(&dev->pdev->dev, "hbm: client connect response: message received.\n");
+
 		connect_res = (struct hbm_client_connect_response *) mei_msg;
 		mei_hbm_cl_connect_res(dev, connect_res);
-		dev_dbg(&dev->pdev->dev, "client connect response message received.\n");
 		wake_up(&dev->wait_recvd_msg);
 		break;
 
 	case CLIENT_DISCONNECT_RES_CMD:
+		dev_dbg(&dev->pdev->dev, "hbm: client disconnect response: message received.\n");
+
 		disconnect_res = (struct hbm_client_connect_response *) mei_msg;
 		mei_hbm_cl_disconnect_res(dev, disconnect_res);
-		dev_dbg(&dev->pdev->dev, "client disconnect response message received.\n");
 		wake_up(&dev->wait_recvd_msg);
 		break;
 
 	case MEI_FLOW_CONTROL_CMD:
+		dev_dbg(&dev->pdev->dev, "hbm: client flow control response: message received.\n");
+
 		flow_control = (struct hbm_flow_control *) mei_msg;
 		mei_hbm_cl_flow_control_res(dev, flow_control);
-		dev_dbg(&dev->pdev->dev, "client flow control response message received.\n");
 		break;
 
 	case HOST_CLIENT_PROPERTIES_RES_CMD:
+		dev_dbg(&dev->pdev->dev, "hbm: properties response: message received.\n");
+
+		dev->init_clients_timer = 0;
+
+		if (dev->me_clients == NULL) {
+			dev_err(&dev->pdev->dev, "hbm: properties response: mei_clients not allocated\n");
+			return -EPROTO;
+		}
+
 		props_res = (struct hbm_props_response *)mei_msg;
 		me_client = &dev->me_clients[dev->me_client_presentation_num];
 
-		if (props_res->status || !dev->me_clients) {
-			dev_err(&dev->pdev->dev, "reset: properties response hbm wrong status.\n");
-			mei_reset(dev, 1);
-			return;
+		if (props_res->status) {
+			dev_err(&dev->pdev->dev, "hbm: properties response: wrong status = %d\n",
+				props_res->status);
+			return -EPROTO;
 		}
 
 		if (me_client->client_id != props_res->address) {
-			dev_err(&dev->pdev->dev, "reset: host properties response address mismatch\n");
-			mei_reset(dev, 1);
-			return;
+			dev_err(&dev->pdev->dev, "hbm: properties response: address mismatch %d ?= %d\n",
+				me_client->client_id, props_res->address);
+			return -EPROTO;
 		}
 
 		if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
 		    dev->hbm_state != MEI_HBM_CLIENT_PROPERTIES) {
-			dev_err(&dev->pdev->dev, "reset: unexpected properties response\n");
-			mei_reset(dev, 1);
-
-			return;
+			dev_err(&dev->pdev->dev, "hbm: properties response: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
 		}
 
 		me_client->props = props_res->client_properties;
@@ -671,49 +716,70 @@ void mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 		dev->me_client_presentation_num++;
 
 		/* request property for the next client */
-		mei_hbm_prop_req(dev);
+		if (mei_hbm_prop_req(dev))
+			return -EIO;
 
 		break;
 
 	case HOST_ENUM_RES_CMD:
+		dev_dbg(&dev->pdev->dev, "hbm: enumeration response: message received\n");
+
+		dev->init_clients_timer = 0;
+
 		enum_res = (struct hbm_host_enum_response *) mei_msg;
 		BUILD_BUG_ON(sizeof(dev->me_clients_map)
 				< sizeof(enum_res->valid_addresses));
 		memcpy(dev->me_clients_map, enum_res->valid_addresses,
 			sizeof(enum_res->valid_addresses));
-		if (dev->dev_state == MEI_DEV_INIT_CLIENTS &&
-		    dev->hbm_state == MEI_HBM_ENUM_CLIENTS) {
-				dev->init_clients_timer = 0;
-				mei_hbm_me_cl_allocate(dev);
-				dev->hbm_state = MEI_HBM_CLIENT_PROPERTIES;
-
-				/* first property reqeust */
-				mei_hbm_prop_req(dev);
-		} else {
-			dev_err(&dev->pdev->dev, "reset: unexpected enumeration response hbm.\n");
-			mei_reset(dev, 1);
-			return;
+
+		if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+		    dev->hbm_state != MEI_HBM_ENUM_CLIENTS) {
+			dev_err(&dev->pdev->dev, "hbm: enumeration response: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
+		}
+
+		if (mei_hbm_me_cl_allocate(dev)) {
+			dev_err(&dev->pdev->dev, "hbm: enumeration response: cannot allocate clients array\n");
+			return -ENOMEM;
 		}
+
+		dev->hbm_state = MEI_HBM_CLIENT_PROPERTIES;
+
+		/* first property request */
+		if (mei_hbm_prop_req(dev))
+			return -EIO;
+
 		break;
 
 	case HOST_STOP_RES_CMD:
+		dev_dbg(&dev->pdev->dev, "hbm: stop response: message received\n");
+
+		dev->init_clients_timer = 0;
+
+		if (dev->hbm_state != MEI_HBM_STOPPED) {
+			dev_err(&dev->pdev->dev, "hbm: stop response: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
+		}
 
-		if (dev->hbm_state != MEI_HBM_STOP)
-			dev_err(&dev->pdev->dev, "unexpected stop response hbm.\n");
 		dev->dev_state = MEI_DEV_DISABLED;
-		dev_info(&dev->pdev->dev, "reset: FW stop response.\n");
-		mei_reset(dev, 1);
+		dev_info(&dev->pdev->dev, "hbm: stop response: resetting.\n");
+		/* force the reset */
+		return -EPROTO;
 		break;
 
 	case CLIENT_DISCONNECT_REQ_CMD:
-		/* search for client */
+		dev_dbg(&dev->pdev->dev, "hbm: disconnect request: message received\n");
+
 		disconnect_req = (struct hbm_client_connect_request *)mei_msg;
 		mei_hbm_fw_disconnect_req(dev, disconnect_req);
 		break;
 
 	case ME_STOP_REQ_CMD:
+		dev_dbg(&dev->pdev->dev, "hbm: stop request: message received\n");
 
-		dev->hbm_state = MEI_HBM_STOP;
+		dev->hbm_state = MEI_HBM_STOPPED;
 		mei_hbm_stop_req_prepare(dev, &dev->wr_ext_msg.hdr,
 					dev->wr_ext_msg.data);
 		break;
@@ -722,5 +788,6 @@ void mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 		break;
 
 	}
+	return 0;
 }
 
diff --git a/drivers/misc/mei/hbm.h b/drivers/misc/mei/hbm.h
index 4ae2e56e404f..5f92188a5cd7 100644
--- a/drivers/misc/mei/hbm.h
+++ b/drivers/misc/mei/hbm.h
@@ -32,13 +32,13 @@ struct mei_cl;
 enum mei_hbm_state {
 	MEI_HBM_IDLE = 0,
 	MEI_HBM_START,
+	MEI_HBM_STARTED,
 	MEI_HBM_ENUM_CLIENTS,
 	MEI_HBM_CLIENT_PROPERTIES,
-	MEI_HBM_STARTED,
-	MEI_HBM_STOP,
+	MEI_HBM_STOPPED,
 };
 
-void mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr);
+int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr);
 
 static inline void mei_hbm_hdr(struct mei_msg_hdr *hdr, size_t length)
 {
@@ -49,6 +49,7 @@ static inline void mei_hbm_hdr(struct mei_msg_hdr *hdr, size_t length)
 	hdr->reserved = 0;
 }
 
+void mei_hbm_idle(struct mei_device *dev);
 int mei_hbm_start_req(struct mei_device *dev);
 int mei_hbm_start_wait(struct mei_device *dev);
 int mei_hbm_cl_flow_control_req(struct mei_device *dev, struct mei_cl *cl);
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index 3412adcdaeb0..6c07623704c2 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -469,7 +469,7 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id)
 	struct mei_device *dev = (struct mei_device *) dev_id;
 	struct mei_cl_cb complete_list;
 	s32 slots;
-	int rets;
+	int rets = 0;
 
 	dev_dbg(&dev->pdev->dev, "function called after ISR to handle the interrupt processing.\n");
 	/* initialize our complete list */
@@ -487,10 +487,9 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id)
 	    dev->dev_state != MEI_DEV_INITIALIZING &&
 	    dev->dev_state != MEI_DEV_POWER_DOWN &&
 	    dev->dev_state != MEI_DEV_POWER_UP) {
-		dev_dbg(&dev->pdev->dev, "FW not ready.\n");
-		mei_reset(dev, 1);
-		mutex_unlock(&dev->device_lock);
-		return IRQ_HANDLED;
+		dev_warn(&dev->pdev->dev, "FW not ready: resetting.\n");
+		schedule_work(&dev->reset_work);
+		goto end;
 	}
 
 	/*  check if we need to start the dev */
@@ -500,15 +499,12 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id)
 
 			dev->recvd_hw_ready = true;
 			wake_up_interruptible(&dev->wait_hw_ready);
-
-			mutex_unlock(&dev->device_lock);
-			return IRQ_HANDLED;
 		} else {
+
 			dev_dbg(&dev->pdev->dev, "Reset Completed.\n");
 			mei_me_hw_reset_release(dev);
-			mutex_unlock(&dev->device_lock);
-			return IRQ_HANDLED;
 		}
+		goto end;
 	}
 	/* check slots available for reading */
 	slots = mei_count_full_read_slots(dev);
@@ -516,21 +512,23 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id)
 		/* we have urgent data to send so break the read */
 		if (dev->wr_ext_msg.hdr.length)
 			break;
-		dev_dbg(&dev->pdev->dev, "slots =%08x\n", slots);
-		dev_dbg(&dev->pdev->dev, "call mei_irq_read_handler.\n");
+		dev_dbg(&dev->pdev->dev, "slots to read = %08x\n", slots);
 		rets = mei_irq_read_handler(dev, &complete_list, &slots);
-		if (rets)
+		if (rets) {
+			schedule_work(&dev->reset_work);
 			goto end;
+		}
 	}
+
 	rets = mei_irq_write_handler(dev, &complete_list);
-end:
-	dev_dbg(&dev->pdev->dev, "end of bottom half function.\n");
-	dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
 
-	mutex_unlock(&dev->device_lock);
+	dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
 
 	mei_irq_compl_handler(dev, &complete_list);
 
+end:
+	dev_dbg(&dev->pdev->dev, "interrupt thread end ret = %d\n", rets);
+	mutex_unlock(&dev->device_lock);
 	return IRQ_HANDLED;
 }
 static const struct mei_hw_ops mei_me_hw_ops = {
diff --git a/drivers/misc/mei/hw.h b/drivers/misc/mei/hw.h
index cb2f556b4252..dd44e33ad2b6 100644
--- a/drivers/misc/mei/hw.h
+++ b/drivers/misc/mei/hw.h
@@ -111,7 +111,8 @@ struct mei_msg_hdr {
 	u32 me_addr:8;
 	u32 host_addr:8;
 	u32 length:9;
-	u32 reserved:6;
+	u32 reserved:5;
+	u32 internal:1;
 	u32 msg_complete:1;
 } __packed;
 
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index f7f3abbe12b6..c47fa273879e 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -43,42 +43,6 @@ const char *mei_dev_state_str(int state)
 #undef MEI_DEV_STATE
 }
 
-void mei_device_init(struct mei_device *dev)
-{
-	/* setup our list array */
-	INIT_LIST_HEAD(&dev->file_list);
-	INIT_LIST_HEAD(&dev->device_list);
-	mutex_init(&dev->device_lock);
-	init_waitqueue_head(&dev->wait_hw_ready);
-	init_waitqueue_head(&dev->wait_recvd_msg);
-	init_waitqueue_head(&dev->wait_stop_wd);
-	dev->dev_state = MEI_DEV_INITIALIZING;
-
-	mei_io_list_init(&dev->read_list);
-	mei_io_list_init(&dev->write_list);
-	mei_io_list_init(&dev->write_waiting_list);
-	mei_io_list_init(&dev->ctrl_wr_list);
-	mei_io_list_init(&dev->ctrl_rd_list);
-
-	INIT_DELAYED_WORK(&dev->timer_work, mei_timer);
-	INIT_WORK(&dev->init_work, mei_host_client_init);
-
-	INIT_LIST_HEAD(&dev->wd_cl.link);
-	INIT_LIST_HEAD(&dev->iamthif_cl.link);
-	mei_io_list_init(&dev->amthif_cmd_list);
-	mei_io_list_init(&dev->amthif_rd_complete_list);
-
-	bitmap_zero(dev->host_clients_map, MEI_CLIENTS_MAX);
-	dev->open_handle_count = 0;
-
-	/*
-	 * Reserving the first client ID
-	 * 0: Reserved for MEI Bus Message communications
-	 */
-	bitmap_set(dev->host_clients_map, 0, 1);
-}
-EXPORT_SYMBOL_GPL(mei_device_init);
-
 /**
  * mei_start - initializes host and fw to start work.
  *
@@ -132,6 +96,20 @@ err:
 EXPORT_SYMBOL_GPL(mei_start);
 
 /**
+ * mei_cancel_work. Cancel mei background jobs
+ *
+ * @dev: the device structure
+ */
+void mei_cancel_work(struct mei_device *dev)
+{
+	cancel_work_sync(&dev->init_work);
+	cancel_work_sync(&dev->reset_work);
+
+	cancel_delayed_work(&dev->timer_work);
+}
+EXPORT_SYMBOL_GPL(mei_cancel_work);
+
+/**
  * mei_reset - resets host and fw.
  *
  * @dev: the device structure
@@ -151,14 +129,19 @@ void mei_reset(struct mei_device *dev, int interrupts_enabled)
 		dev_warn(&dev->pdev->dev, "unexpected reset: dev_state = %s\n",
 			 mei_dev_state_str(dev->dev_state));
 
+	/* we're already in reset, cancel the init timer
+	 * if the reset was called due the hbm protocol error
+	 * we need to call it before hw start
+	 * so the hbm watchdog won't kick in
+	 */
+	mei_hbm_idle(dev);
+
 	ret = mei_hw_reset(dev, interrupts_enabled);
 	if (ret) {
 		dev_err(&dev->pdev->dev, "hw reset failed disabling the device\n");
 		interrupts_enabled = false;
-		dev->dev_state = MEI_DEV_DISABLED;
 	}
 
-	dev->hbm_state = MEI_HBM_IDLE;
 
 	if (dev->dev_state != MEI_DEV_INITIALIZING &&
 	    dev->dev_state != MEI_DEV_POWER_UP) {
@@ -182,8 +165,6 @@ void mei_reset(struct mei_device *dev, int interrupts_enabled)
 		memset(&dev->wr_ext_msg, 0, sizeof(dev->wr_ext_msg));
 	}
 
-	/* we're already in reset, cancel the init timer */
-	dev->init_clients_timer = 0;
 
 	dev->me_clients_num = 0;
 	dev->rd_msg_hdr = 0;
@@ -191,6 +172,7 @@ void mei_reset(struct mei_device *dev, int interrupts_enabled)
 
 	if (!interrupts_enabled) {
 		dev_dbg(&dev->pdev->dev, "intr not enabled end of reset\n");
+		dev->dev_state = MEI_DEV_DISABLED;
 		return;
 	}
 
@@ -206,25 +188,39 @@ void mei_reset(struct mei_device *dev, int interrupts_enabled)
 
 	dev->dev_state = MEI_DEV_INIT_CLIENTS;
 
-	mei_hbm_start_req(dev);
-
+	ret = mei_hbm_start_req(dev);
+	if (ret) {
+		dev_err(&dev->pdev->dev, "hbm_start failed disabling the device\n");
+		dev->dev_state = MEI_DEV_DISABLED;
+		return;
+	}
 }
 EXPORT_SYMBOL_GPL(mei_reset);
 
+static void mei_reset_work(struct work_struct *work)
+{
+	struct mei_device *dev =
+		container_of(work, struct mei_device,  reset_work);
+
+	mutex_lock(&dev->device_lock);
+
+	mei_reset(dev, true);
+
+	mutex_unlock(&dev->device_lock);
+}
+
 void mei_stop(struct mei_device *dev)
 {
 	dev_dbg(&dev->pdev->dev, "stopping the device.\n");
 
-	flush_scheduled_work();
+	mei_cancel_work(dev);
 
-	mutex_lock(&dev->device_lock);
+	mei_nfc_host_exit(dev);
 
-	cancel_delayed_work(&dev->timer_work);
+	mutex_lock(&dev->device_lock);
 
 	mei_wd_stop(dev);
 
-	mei_nfc_host_exit();
-
 	dev->dev_state = MEI_DEV_POWER_DOWN;
 	mei_reset(dev, 0);
 
@@ -236,3 +232,40 @@ EXPORT_SYMBOL_GPL(mei_stop);
 
 
 
+void mei_device_init(struct mei_device *dev)
+{
+	/* setup our list array */
+	INIT_LIST_HEAD(&dev->file_list);
+	INIT_LIST_HEAD(&dev->device_list);
+	mutex_init(&dev->device_lock);
+	init_waitqueue_head(&dev->wait_hw_ready);
+	init_waitqueue_head(&dev->wait_recvd_msg);
+	init_waitqueue_head(&dev->wait_stop_wd);
+	dev->dev_state = MEI_DEV_INITIALIZING;
+
+	mei_io_list_init(&dev->read_list);
+	mei_io_list_init(&dev->write_list);
+	mei_io_list_init(&dev->write_waiting_list);
+	mei_io_list_init(&dev->ctrl_wr_list);
+	mei_io_list_init(&dev->ctrl_rd_list);
+
+	INIT_DELAYED_WORK(&dev->timer_work, mei_timer);
+	INIT_WORK(&dev->init_work, mei_host_client_init);
+	INIT_WORK(&dev->reset_work, mei_reset_work);
+
+	INIT_LIST_HEAD(&dev->wd_cl.link);
+	INIT_LIST_HEAD(&dev->iamthif_cl.link);
+	mei_io_list_init(&dev->amthif_cmd_list);
+	mei_io_list_init(&dev->amthif_rd_complete_list);
+
+	bitmap_zero(dev->host_clients_map, MEI_CLIENTS_MAX);
+	dev->open_handle_count = 0;
+
+	/*
+	 * Reserving the first client ID
+	 * 0: Reserved for MEI Bus Message communications
+	 */
+	bitmap_set(dev->host_clients_map, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mei_device_init);
+
diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
index 7a95c07e59a6..2a7277de7ca1 100644
--- a/drivers/misc/mei/interrupt.c
+++ b/drivers/misc/mei/interrupt.c
@@ -31,7 +31,7 @@
 
 
 /**
- * mei_irq_compl_handler - dispatch complete handelers
+ * mei_irq_compl_handler - dispatch complete handlers
  *	for the completed callbacks
  *
  * @dev - mei device
@@ -301,13 +301,11 @@ int mei_irq_read_handler(struct mei_device *dev,
 		struct mei_cl_cb *cmpl_list, s32 *slots)
 {
 	struct mei_msg_hdr *mei_hdr;
-	struct mei_cl *cl_pos = NULL;
-	struct mei_cl *cl_next = NULL;
-	int ret = 0;
+	struct mei_cl *cl;
+	int ret;
 
 	if (!dev->rd_msg_hdr) {
 		dev->rd_msg_hdr = mei_read_hdr(dev);
-		dev_dbg(&dev->pdev->dev, "slots =%08x.\n", *slots);
 		(*slots)--;
 		dev_dbg(&dev->pdev->dev, "slots =%08x.\n", *slots);
 	}
@@ -315,61 +313,67 @@ int mei_irq_read_handler(struct mei_device *dev,
 	dev_dbg(&dev->pdev->dev, MEI_HDR_FMT, MEI_HDR_PRM(mei_hdr));
 
 	if (mei_hdr->reserved || !dev->rd_msg_hdr) {
-		dev_dbg(&dev->pdev->dev, "corrupted message header.\n");
+		dev_err(&dev->pdev->dev, "corrupted message header 0x%08X\n",
+				dev->rd_msg_hdr);
 		ret = -EBADMSG;
 		goto end;
 	}
 
-	if (mei_hdr->host_addr || mei_hdr->me_addr) {
-		list_for_each_entry_safe(cl_pos, cl_next,
-					&dev->file_list, link) {
-			dev_dbg(&dev->pdev->dev,
-					"list_for_each_entry_safe read host"
-					" client = %d, ME client = %d\n",
-					cl_pos->host_client_id,
-					cl_pos->me_client_id);
-			if (mei_cl_hbm_equal(cl_pos, mei_hdr))
-				break;
-		}
-
-		if (&cl_pos->link == &dev->file_list) {
-			dev_dbg(&dev->pdev->dev, "corrupted message header\n");
-			ret = -EBADMSG;
-			goto end;
-		}
-	}
-	if (((*slots) * sizeof(u32)) < mei_hdr->length) {
-		dev_err(&dev->pdev->dev,
-				"we can't read the message slots =%08x.\n",
+	if (mei_slots2data(*slots) < mei_hdr->length) {
+		dev_err(&dev->pdev->dev, "less data available than length=%08x.\n",
 				*slots);
 		/* we can't read the message */
 		ret = -ERANGE;
 		goto end;
 	}
 
-	/* decide where to read the message too */
-	if (!mei_hdr->host_addr) {
-		dev_dbg(&dev->pdev->dev, "call mei_hbm_dispatch.\n");
-		mei_hbm_dispatch(dev, mei_hdr);
-		dev_dbg(&dev->pdev->dev, "end mei_hbm_dispatch.\n");
-	} else if (mei_hdr->host_addr == dev->iamthif_cl.host_client_id &&
-		   (MEI_FILE_CONNECTED == dev->iamthif_cl.state) &&
-		   (dev->iamthif_state == MEI_IAMTHIF_READING)) {
+	/*  HBM message */
+	if (mei_hdr->host_addr == 0 && mei_hdr->me_addr == 0) {
+		ret = mei_hbm_dispatch(dev, mei_hdr);
+		if (ret) {
+			dev_dbg(&dev->pdev->dev, "mei_hbm_dispatch failed ret = %d\n",
+					ret);
+			goto end;
+		}
+		goto reset_slots;
+	}
 
-		dev_dbg(&dev->pdev->dev, "call mei_amthif_irq_read_msg.\n");
-		dev_dbg(&dev->pdev->dev, MEI_HDR_FMT, MEI_HDR_PRM(mei_hdr));
+	/* find recipient cl */
+	list_for_each_entry(cl, &dev->file_list, link) {
+		if (mei_cl_hbm_equal(cl, mei_hdr)) {
+			cl_dbg(dev, cl, "got a message\n");
+			break;
+		}
+	}
+
+	/* if no recipient cl was found we assume corrupted header */
+	if (&cl->link == &dev->file_list) {
+		dev_err(&dev->pdev->dev, "no destination client found 0x%08X\n",
+				dev->rd_msg_hdr);
+		ret = -EBADMSG;
+		goto end;
+	}
+
+	if (mei_hdr->host_addr == dev->iamthif_cl.host_client_id &&
+	    MEI_FILE_CONNECTED == dev->iamthif_cl.state &&
+	    dev->iamthif_state == MEI_IAMTHIF_READING) {
 
 		ret = mei_amthif_irq_read_msg(dev, mei_hdr, cmpl_list);
-		if (ret)
+		if (ret) {
+			dev_err(&dev->pdev->dev, "mei_amthif_irq_read_msg failed = %d\n",
+					ret);
 			goto end;
+		}
 	} else {
-		dev_dbg(&dev->pdev->dev, "call mei_cl_irq_read_msg.\n");
-		dev_dbg(&dev->pdev->dev, MEI_HDR_FMT, MEI_HDR_PRM(mei_hdr));
 		ret = mei_cl_irq_read_msg(dev, mei_hdr, cmpl_list);
-		if (ret)
+		if (ret) {
+			dev_err(&dev->pdev->dev, "mei_cl_irq_read_msg failed = %d\n",
+					ret);
 			goto end;
+		}
 	}
 
+reset_slots:
 	/* reset the number of slots and header */
 	*slots = mei_count_full_read_slots(dev);
 	dev->rd_msg_hdr = 0;
@@ -533,7 +537,6 @@ EXPORT_SYMBOL_GPL(mei_irq_write_handler);
  *
  * @work: pointer to the work_struct structure
  *
- * NOTE: This function is called by timer interrupt work
  */
 void mei_timer(struct work_struct *work)
 {
@@ -548,18 +551,24 @@ void mei_timer(struct work_struct *work)
 
 
 	mutex_lock(&dev->device_lock);
-	if (dev->dev_state != MEI_DEV_ENABLED) {
-		if (dev->dev_state == MEI_DEV_INIT_CLIENTS) {
-			if (dev->init_clients_timer) {
-				if (--dev->init_clients_timer == 0) {
-					dev_err(&dev->pdev->dev, "reset: init clients timeout hbm_state = %d.\n",
-						dev->hbm_state);
-					mei_reset(dev, 1);
-				}
+
+	/* Catch interrupt stalls during HBM init handshake */
+	if (dev->dev_state == MEI_DEV_INIT_CLIENTS &&
+	    dev->hbm_state != MEI_HBM_IDLE) {
+
+		if (dev->init_clients_timer) {
+			if (--dev->init_clients_timer == 0) {
+				dev_err(&dev->pdev->dev, "timer: init clients timeout hbm_state = %d.\n",
+					dev->hbm_state);
+				mei_reset(dev, 1);
+				goto out;
 			}
 		}
-		goto out;
 	}
+
+	if (dev->dev_state != MEI_DEV_ENABLED)
+		goto out;
+
 	/*** connect/disconnect timeouts ***/
 	list_for_each_entry_safe(cl_pos, cl_next, &dev->file_list, link) {
 		if (cl_pos->timer_count) {
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 9661a812f550..5424f8ff3f7f 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -48,7 +48,7 @@
  *
  * @inode: pointer to inode structure
  * @file: pointer to file structure
- e
+ *
  * returns 0 on success, <0 on error
  */
 static int mei_open(struct inode *inode, struct file *file)
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index 406f68e05b4e..2c7692807a4a 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -65,7 +65,7 @@ extern const uuid_le mei_wd_guid;
  * that can be opened to the driver.
  *
  * Limit to 255: 256 Total Clients
- * minus internal client for MEI Bus Messags
+ * minus internal client for MEI Bus Messages
  */
 #define  MEI_MAX_OPEN_HANDLE_COUNT (MEI_CLIENTS_MAX - 1)
 
@@ -178,9 +178,10 @@ struct mei_cl_cb {
 	unsigned long buf_idx;
 	unsigned long read_time;
 	struct file *file_object;
+	u32 internal:1;
 };
 
-/* MEI client instance carried as file->pirvate_data*/
+/* MEI client instance carried as file->private_data*/
 struct mei_cl {
 	struct list_head link;
 	struct mei_device *dev;
@@ -427,6 +428,7 @@ struct mei_device {
 	bool iamthif_canceled;
 
 	struct work_struct init_work;
+	struct work_struct reset_work;
 
 	/* List of bus devices */
 	struct list_head device_list;
@@ -456,6 +458,16 @@ static inline u32 mei_data2slots(size_t length)
 	return DIV_ROUND_UP(sizeof(struct mei_msg_hdr) + length, 4);
 }
 
+/**
+ * mei_slots2data- get data in slots - bytes from slots
+ * @slots -  number of available slots
+ * returns  - number of bytes in slots
+ */
+static inline u32 mei_slots2data(int slots)
+{
+	return slots * 4;
+}
+
 /*
  * mei init function prototypes
  */
@@ -463,6 +475,7 @@ void mei_device_init(struct mei_device *dev);
 void mei_reset(struct mei_device *dev, int interrupts);
 int mei_start(struct mei_device *dev);
 void mei_stop(struct mei_device *dev);
+void mei_cancel_work(struct mei_device *dev);
 
 /*
  *  MEI interrupt functions prototype
@@ -510,7 +523,7 @@ int mei_amthif_irq_read(struct mei_device *dev, s32 *slots);
  * NFC functions
  */
 int mei_nfc_host_init(struct mei_device *dev);
-void mei_nfc_host_exit(void);
+void mei_nfc_host_exit(struct mei_device *dev);
 
 /*
  * NFC Client UUID
@@ -626,9 +639,9 @@ static inline void mei_dbgfs_deregister(struct mei_device *dev) {}
 int mei_register(struct mei_device *dev);
 void mei_deregister(struct mei_device *dev);
 
-#define MEI_HDR_FMT "hdr:host=%02d me=%02d len=%d comp=%1d"
+#define MEI_HDR_FMT "hdr:host=%02d me=%02d len=%d internal=%1d comp=%1d"
 #define MEI_HDR_PRM(hdr)                  \
 	(hdr)->host_addr, (hdr)->me_addr, \
-	(hdr)->length, (hdr)->msg_complete
+	(hdr)->length, (hdr)->internal, (hdr)->msg_complete
 
 #endif
diff --git a/drivers/misc/mei/nfc.c b/drivers/misc/mei/nfc.c
index 994ca4aff1a3..a58320c0c049 100644
--- a/drivers/misc/mei/nfc.c
+++ b/drivers/misc/mei/nfc.c
@@ -92,7 +92,7 @@ struct mei_nfc_hci_hdr {
  * @cl: NFC host client
  * @cl_info: NFC info host client
  * @init_work: perform connection to the info client
- * @fw_ivn: NFC Intervace Version Number
+ * @fw_ivn: NFC Interface Version Number
  * @vendor_id: NFC manufacturer ID
  * @radio_type: NFC radio type
  */
@@ -163,7 +163,7 @@ static int mei_nfc_build_bus_name(struct mei_nfc_dev *ndev)
 			return 0;
 
 		default:
-			dev_err(&dev->pdev->dev, "Unknow radio type 0x%x\n",
+			dev_err(&dev->pdev->dev, "Unknown radio type 0x%x\n",
 				ndev->radio_type);
 
 			return -EINVAL;
@@ -175,14 +175,14 @@ static int mei_nfc_build_bus_name(struct mei_nfc_dev *ndev)
 			ndev->bus_name = "pn544";
 			return 0;
 		default:
-			dev_err(&dev->pdev->dev, "Unknow radio type 0x%x\n",
+			dev_err(&dev->pdev->dev, "Unknown radio type 0x%x\n",
 				ndev->radio_type);
 
 			return -EINVAL;
 		}
 
 	default:
-		dev_err(&dev->pdev->dev, "Unknow vendor ID 0x%x\n",
+		dev_err(&dev->pdev->dev, "Unknown vendor ID 0x%x\n",
 			ndev->vendor_id);
 
 		return -EINVAL;
@@ -428,7 +428,7 @@ static void mei_nfc_init(struct work_struct *work)
 	mutex_unlock(&dev->device_lock);
 
 	if (mei_nfc_if_version(ndev) < 0) {
-		dev_err(&dev->pdev->dev, "Could not get the NFC interfave version");
+		dev_err(&dev->pdev->dev, "Could not get the NFC interface version");
 
 		goto err;
 	}
@@ -469,7 +469,9 @@ static void mei_nfc_init(struct work_struct *work)
 	return;
 
 err:
+	mutex_lock(&dev->device_lock);
 	mei_nfc_free(ndev);
+	mutex_unlock(&dev->device_lock);
 
 	return;
 }
@@ -481,7 +483,7 @@ int mei_nfc_host_init(struct mei_device *dev)
 	struct mei_cl *cl_info, *cl = NULL;
 	int i, ret;
 
-	/* already initialzed */
+	/* already initialized */
 	if (ndev->cl_info)
 		return 0;
 
@@ -547,12 +549,16 @@ err:
 	return ret;
 }
 
-void mei_nfc_host_exit(void)
+void mei_nfc_host_exit(struct mei_device *dev)
 {
 	struct mei_nfc_dev *ndev = &nfc_dev;
 
+	cancel_work_sync(&ndev->init_work);
+
+	mutex_lock(&dev->device_lock);
 	if (ndev->cl && ndev->cl->device)
 		mei_cl_remove_device(ndev->cl->device);
 
 	mei_nfc_free(ndev);
+	mutex_unlock(&dev->device_lock);
 }
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index 2cab3c0a6805..8b1deea2abf6 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -144,6 +144,21 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		dev_err(&pdev->dev, "failed to get pci regions.\n");
 		goto disable_device;
 	}
+
+	if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) ||
+	    dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) {
+
+		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+		if (err)
+			err = dma_set_coherent_mask(&pdev->dev,
+						    DMA_BIT_MASK(32));
+	}
+	if (err) {
+		dev_err(&pdev->dev, "No usable DMA configuration, aborting\n");
+		goto release_regions;
+	}
+
+
 	/* allocates and initializes the mei dev structure */
 	dev = mei_me_dev_init(pdev);
 	if (!dev) {
@@ -197,8 +212,8 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	return 0;
 
 release_irq:
+	mei_cancel_work(dev);
 	mei_disable_interrupts(dev);
-	flush_scheduled_work();
 	free_irq(pdev->irq, dev);
 disable_msi:
 	pci_disable_msi(pdev);
diff --git a/drivers/misc/mei/wd.c b/drivers/misc/mei/wd.c
index 9e354216c163..f70945ed96f6 100644
--- a/drivers/misc/mei/wd.c
+++ b/drivers/misc/mei/wd.c
@@ -115,6 +115,7 @@ int mei_wd_send(struct mei_device *dev)
 	hdr.me_addr = dev->wd_cl.me_client_id;
 	hdr.msg_complete = 1;
 	hdr.reserved = 0;
+	hdr.internal = 0;
 
 	if (!memcmp(dev->wd_data, mei_start_wd_params, MEI_WD_HDR_SIZE))
 		hdr.length = MEI_WD_START_MSG_SIZE;
diff --git a/drivers/misc/mic/host/mic_device.h b/drivers/misc/mic/host/mic_device.h
index 3574cc375bb9..b2da289320c9 100644
--- a/drivers/misc/mic/host/mic_device.h
+++ b/drivers/misc/mic/host/mic_device.h
@@ -134,6 +134,8 @@ struct mic_device {
  * @send_intr: Send an interrupt for a particular doorbell on the card.
  * @ack_interrupt: Hardware specific operations to ack the h/w on
  * receipt of an interrupt.
+ * @intr_workarounds: Hardware specific workarounds needed after
+ * handling an interrupt.
  * @reset: Reset the remote processor.
  * @reset_fw_ready: Reset firmware ready field.
  * @is_fw_ready: Check if firmware is ready for OS download.
@@ -149,6 +151,7 @@ struct mic_hw_ops {
 	void (*write_spad)(struct mic_device *mdev, unsigned int idx, u32 val);
 	void (*send_intr)(struct mic_device *mdev, int doorbell);
 	u32 (*ack_interrupt)(struct mic_device *mdev);
+	void (*intr_workarounds)(struct mic_device *mdev);
 	void (*reset)(struct mic_device *mdev);
 	void (*reset_fw_ready)(struct mic_device *mdev);
 	bool (*is_fw_ready)(struct mic_device *mdev);
diff --git a/drivers/misc/mic/host/mic_main.c b/drivers/misc/mic/host/mic_main.c
index ad838c7651c4..c04a021e20c7 100644
--- a/drivers/misc/mic/host/mic_main.c
+++ b/drivers/misc/mic/host/mic_main.c
@@ -115,7 +115,7 @@ static irqreturn_t mic_shutdown_db(int irq, void *data)
 	struct mic_device *mdev = data;
 	struct mic_bootparam *bootparam = mdev->dp;
 
-	mdev->ops->ack_interrupt(mdev);
+	mdev->ops->intr_workarounds(mdev);
 
 	switch (bootparam->shutdown_status) {
 	case MIC_HALTED:
diff --git a/drivers/misc/mic/host/mic_virtio.c b/drivers/misc/mic/host/mic_virtio.c
index e04bb4fe6823..752ff873f891 100644
--- a/drivers/misc/mic/host/mic_virtio.c
+++ b/drivers/misc/mic/host/mic_virtio.c
@@ -369,7 +369,7 @@ static irqreturn_t mic_virtio_intr_handler(int irq, void *data)
 	struct mic_vdev *mvdev = data;
 	struct mic_device *mdev = mvdev->mdev;
 
-	mdev->ops->ack_interrupt(mdev);
+	mdev->ops->intr_workarounds(mdev);
 	schedule_work(&mvdev->virtio_bh_work);
 	return IRQ_HANDLED;
 }
diff --git a/drivers/misc/mic/host/mic_x100.c b/drivers/misc/mic/host/mic_x100.c
index 0dfa8a81436e..5562fdd3ef4e 100644
--- a/drivers/misc/mic/host/mic_x100.c
+++ b/drivers/misc/mic/host/mic_x100.c
@@ -174,35 +174,38 @@ static void mic_x100_send_intr(struct mic_device *mdev, int doorbell)
 }
 
 /**
- * mic_ack_interrupt - Device specific interrupt handling.
- * @mdev: pointer to mic_device instance
+ * mic_x100_ack_interrupt - Read the interrupt sources register and
+ * clear it. This function will be called in the MSI/INTx case.
+ * @mdev: Pointer to mic_device instance.
  *
- * Returns: bitmask of doorbell events triggered.
+ * Returns: bitmask of interrupt sources triggered.
  */
 static u32 mic_x100_ack_interrupt(struct mic_device *mdev)
 {
-	u32 reg = 0;
-	struct mic_mw *mw = &mdev->mmio;
 	u32 sicr0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICR0;
+	u32 reg = mic_mmio_read(&mdev->mmio, sicr0);
+	mic_mmio_write(&mdev->mmio, reg, sicr0);
+	return reg;
+}
+
+/**
+ * mic_x100_intr_workarounds - These hardware specific workarounds are
+ * to be invoked everytime an interrupt is handled.
+ * @mdev: Pointer to mic_device instance.
+ *
+ * Returns: none
+ */
+static void mic_x100_intr_workarounds(struct mic_device *mdev)
+{
+	struct mic_mw *mw = &mdev->mmio;
 
 	/* Clear pending bit array. */
 	if (MIC_A0_STEP == mdev->stepping)
 		mic_mmio_write(mw, 1, MIC_X100_SBOX_BASE_ADDRESS +
 			MIC_X100_SBOX_MSIXPBACR);
 
-	if (mdev->irq_info.num_vectors <= 1) {
-		reg = mic_mmio_read(mw, sicr0);
-
-		if (unlikely(!reg))
-			goto done;
-
-		mic_mmio_write(mw, reg, sicr0);
-	}
-
 	if (mdev->stepping >= MIC_B0_STEP)
 		mdev->intr_ops->enable_interrupts(mdev);
-done:
-	return reg;
 }
 
 /**
@@ -553,6 +556,7 @@ struct mic_hw_ops mic_x100_ops = {
 	.write_spad = mic_x100_write_spad,
 	.send_intr = mic_x100_send_intr,
 	.ack_interrupt = mic_x100_ack_interrupt,
+	.intr_workarounds = mic_x100_intr_workarounds,
 	.reset = mic_x100_hw_reset,
 	.reset_fw_ready = mic_x100_reset_fw_ready,
 	.is_fw_ready = mic_x100_is_fw_ready,
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 652593fc486d..128d5615c804 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -828,6 +828,7 @@ enum xp_retval
 xpc_allocate_msg_wait(struct xpc_channel *ch)
 {
 	enum xp_retval ret;
+	DEFINE_WAIT(wait);
 
 	if (ch->flags & XPC_C_DISCONNECTING) {
 		DBUG_ON(ch->reason == xpInterrupted);
@@ -835,7 +836,9 @@ xpc_allocate_msg_wait(struct xpc_channel *ch)
 	}
 
 	atomic_inc(&ch->n_on_msg_allocate_wq);
-	ret = interruptible_sleep_on_timeout(&ch->msg_allocate_wq, 1);
+	prepare_to_wait(&ch->msg_allocate_wq, &wait, TASK_INTERRUPTIBLE);
+	ret = schedule_timeout(1);
+	finish_wait(&ch->msg_allocate_wq, &wait);
 	atomic_dec(&ch->n_on_msg_allocate_wq);
 
 	if (ch->flags & XPC_C_DISCONNECTING) {
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 8d64b681dd93..3aed525e55b4 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -812,7 +812,7 @@ static void st_tty_flush_buffer(struct tty_struct *tty)
 	kfree_skb(st_gdata->tx_skb);
 	st_gdata->tx_skb = NULL;
 
-	tty->ops->flush_buffer(tty);
+	tty_driver_flush_buffer(tty);
 	return;
 }
 
diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c
index 96853a09788a..9d3dbb28734b 100644
--- a/drivers/misc/ti-st/st_kim.c
+++ b/drivers/misc/ti-st/st_kim.c
@@ -531,7 +531,6 @@ long st_kim_stop(void *kim_data)
 		/* Flush any pending characters in the driver and discipline. */
 		tty_ldisc_flush(tty);
 		tty_driver_flush_buffer(tty);
-		tty->ops->flush_buffer(tty);
 	}
 
 	/* send uninstall notification to UIM */
diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index 963761526229..76ee7750bc5e 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -2600,8 +2600,6 @@ enum parport_pc_pci_cards {
 	syba_2p_epp,
 	syba_1p_ecp,
 	titan_010l,
-	titan_1284p1,
-	titan_1284p2,
 	avlab_1p,
 	avlab_2p,
 	oxsemi_952,
@@ -2660,8 +2658,6 @@ static struct parport_pc_pci {
 	/* syba_2p_epp AP138B */	{ 2, { { 0, 0x078 }, { 0, 0x178 }, } },
 	/* syba_1p_ecp W83787 */	{ 1, { { 0, 0x078 }, } },
 	/* titan_010l */		{ 1, { { 3, -1 }, } },
-	/* titan_1284p1 */              { 1, { { 0, 1 }, } },
-	/* titan_1284p2 */		{ 2, { { 0, 1 }, { 2, 3 }, } },
 	/* avlab_1p		*/	{ 1, { { 0, 1}, } },
 	/* avlab_2p		*/	{ 2, { { 0, 1}, { 2, 3 },} },
 	/* The Oxford Semi cards are unusual: 954 doesn't support ECP,
@@ -2677,8 +2673,8 @@ static struct parport_pc_pci {
 	/* netmos_9705 */               { 1, { { 0, -1 }, } },
 	/* netmos_9715 */               { 2, { { 0, 1 }, { 2, 3 },} },
 	/* netmos_9755 */               { 2, { { 0, 1 }, { 2, 3 },} },
-	/* netmos_9805 */               { 1, { { 0, -1 }, } },
-	/* netmos_9815 */               { 2, { { 0, -1 }, { 2, -1 }, } },
+	/* netmos_9805 */		{ 1, { { 0, 1 }, } },
+	/* netmos_9815 */		{ 2, { { 0, 1 }, { 2, 3 }, } },
 	/* netmos_9901 */               { 1, { { 0, -1 }, } },
 	/* netmos_9865 */               { 1, { { 0, -1 }, } },
 	/* quatech_sppxp100 */		{ 1, { { 0, 1 }, } },
@@ -2722,8 +2718,6 @@ static const struct pci_device_id parport_pc_pci_tbl[] = {
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, syba_1p_ecp },
 	{ PCI_VENDOR_ID_TITAN, PCI_DEVICE_ID_TITAN_010L,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, titan_010l },
-	{ 0x9710, 0x9805, 0x1000, 0x0010, 0, 0, titan_1284p1 },
-	{ 0x9710, 0x9815, 0x1000, 0x0020, 0, 0, titan_1284p2 },
 	/* PCI_VENDOR_ID_AVLAB/Intek21 has another bunch of cards ...*/
 	/* AFAVLAB_TK9902 */
 	{ 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p},
@@ -2827,16 +2821,12 @@ static int parport_pc_pci_probe(struct pci_dev *dev,
 		if (irq == IRQ_NONE) {
 			printk(KERN_DEBUG
 	"PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx)\n",
-				parport_pc_pci_tbl[i + last_sio].vendor,
-				parport_pc_pci_tbl[i + last_sio].device,
-				io_lo, io_hi);
+				id->vendor, id->device, io_lo, io_hi);
 			irq = PARPORT_IRQ_NONE;
 		} else {
 			printk(KERN_DEBUG
 	"PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx), IRQ %d\n",
-				parport_pc_pci_tbl[i + last_sio].vendor,
-				parport_pc_pci_tbl[i + last_sio].device,
-				io_lo, io_hi, irq);
+				id->vendor, id->device, io_lo, io_hi, irq);
 		}
 		data->ports[count] =
 			parport_pc_probe_port(io_lo, io_hi, irq,
@@ -2866,8 +2856,6 @@ static void parport_pc_pci_remove(struct pci_dev *dev)
 	struct pci_parport_data *data = pci_get_drvdata(dev);
 	int i;
 
-	pci_set_drvdata(dev, NULL);
-
 	if (data) {
 		for (i = data->num - 1; i >= 0; i--)
 			parport_pc_unregister_port(data->ports[i]);
diff --git a/drivers/pcmcia/bfin_cf_pcmcia.c b/drivers/pcmcia/bfin_cf_pcmcia.c
index ed3b522601b3..971991bab975 100644
--- a/drivers/pcmcia/bfin_cf_pcmcia.c
+++ b/drivers/pcmcia/bfin_cf_pcmcia.c
@@ -303,7 +303,7 @@ static int bfin_cf_remove(struct platform_device *pdev)
 
 static struct platform_driver bfin_cf_driver = {
 	.driver = {
-		   .name = (char *)driver_name,
+		   .name = driver_name,
 		   .owner = THIS_MODULE,
 		   },
 	.probe = bfin_cf_probe,
diff --git a/drivers/pcmcia/electra_cf.c b/drivers/pcmcia/electra_cf.c
index 1b206eac5f93..5ea64d0f61ab 100644
--- a/drivers/pcmcia/electra_cf.c
+++ b/drivers/pcmcia/electra_cf.c
@@ -359,7 +359,7 @@ MODULE_DEVICE_TABLE(of, electra_cf_match);
 
 static struct platform_driver electra_cf_driver = {
 	.driver = {
-		.name = (char *)driver_name,
+		.name = driver_name,
 		.owner = THIS_MODULE,
 		.of_match_table = electra_cf_match,
 	},
diff --git a/drivers/pinctrl/pinctrl-baytrail.c b/drivers/pinctrl/pinctrl-baytrail.c
index 2832576d8b12..114f5ef4b73a 100644
--- a/drivers/pinctrl/pinctrl-baytrail.c
+++ b/drivers/pinctrl/pinctrl-baytrail.c
@@ -512,6 +512,7 @@ static const struct dev_pm_ops byt_gpio_pm_ops = {
 
 static const struct acpi_device_id byt_gpio_acpi_match[] = {
 	{ "INT33B2", 0 },
+	{ "INT33FC", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(acpi, byt_gpio_acpi_match);
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index 2a786c504460..3c6768378a94 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -833,6 +833,11 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
 	return 0;
 }
 
+static const struct x86_cpu_id energy_unit_quirk_ids[] = {
+	{ X86_VENDOR_INTEL, 6, 0x37},/* VLV */
+	{}
+};
+
 static int rapl_check_unit(struct rapl_package *rp, int cpu)
 {
 	u64 msr_val;
@@ -853,8 +858,11 @@ static int rapl_check_unit(struct rapl_package *rp, int cpu)
 	 * time unit: 1/time_unit_divisor Seconds
 	 */
 	value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
-	rp->energy_unit_divisor = 1 << value;
-
+	/* some CPUs have different way to calculate energy unit */
+	if (x86_match_cpu(energy_unit_quirk_ids))
+		rp->energy_unit_divisor = 1000000 / (1 << value);
+	else
+		rp->energy_unit_divisor = 1 << value;
 
 	value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
 	rp->power_unit_divisor = 1 << value;
@@ -941,6 +949,7 @@ static void package_power_limit_irq_restore(int package_id)
 static const struct x86_cpu_id rapl_ids[] = {
 	{ X86_VENDOR_INTEL, 6, 0x2a},/* SNB */
 	{ X86_VENDOR_INTEL, 6, 0x2d},/* SNB EP */
+	{ X86_VENDOR_INTEL, 6, 0x37},/* VLV */
 	{ X86_VENDOR_INTEL, 6, 0x3a},/* IVB */
 	{ X86_VENDOR_INTEL, 6, 0x45},/* HSW */
 	/* TODO: Add more CPU IDs after testing */
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index f7beb6eb40c7..a673e5b6a2e0 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -847,7 +847,7 @@ int __uio_register_device(struct module *owner,
 	info->uio_dev = idev;
 
 	if (info->irq && (info->irq != UIO_IRQ_CUSTOM)) {
-		ret = devm_request_irq(parent, info->irq, uio_interrupt,
+		ret = devm_request_irq(idev->dev, info->irq, uio_interrupt,
 				  info->irq_flags, info->name, idev);
 		if (ret)
 			goto err_request_irq;
diff --git a/drivers/uio/uio_mf624.c b/drivers/uio/uio_mf624.c
index f764adbfe036..d1f95a1567bb 100644
--- a/drivers/uio/uio_mf624.c
+++ b/drivers/uio/uio_mf624.c
@@ -228,7 +228,7 @@ static void mf624_pci_remove(struct pci_dev *dev)
 	kfree(info);
 }
 
-static DEFINE_PCI_DEVICE_TABLE(mf624_pci_id) = {
+static const struct pci_device_id mf624_pci_id[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_HUMUSOFT, PCI_DEVICE_ID_MF624) },
 	{ 0, }
 };
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index c444654fc33f..5c4a95b516cf 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -285,7 +285,7 @@ static void update_balloon_size(struct virtio_balloon *vb)
 {
 	__le32 actual = cpu_to_le32(vb->num_pages);
 
-	virtio_cwrite(vb->vdev, struct virtio_balloon_config, num_pages,
+	virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual,
 		      &actual);
 }
 
diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c
index 15c7251b0556..1e5d94c5afc9 100644
--- a/drivers/w1/masters/mxc_w1.c
+++ b/drivers/w1/masters/mxc_w1.c
@@ -46,7 +46,6 @@
 
 struct mxc_w1_device {
 	void __iomem *regs;
-	unsigned int clkdiv;
 	struct clk *clk;
 	struct w1_bus_master bus_master;
 };
@@ -106,8 +105,10 @@ static u8 mxc_w1_ds2_touch_bit(void *data, u8 bit)
 static int mxc_w1_probe(struct platform_device *pdev)
 {
 	struct mxc_w1_device *mdev;
+	unsigned long clkrate;
 	struct resource *res;
-	int err = 0;
+	unsigned int clkdiv;
+	int err;
 
 	mdev = devm_kzalloc(&pdev->dev, sizeof(struct mxc_w1_device),
 			    GFP_KERNEL);
@@ -118,27 +119,39 @@ static int mxc_w1_probe(struct platform_device *pdev)
 	if (IS_ERR(mdev->clk))
 		return PTR_ERR(mdev->clk);
 
-	mdev->clkdiv = (clk_get_rate(mdev->clk) / 1000000) - 1;
+	clkrate = clk_get_rate(mdev->clk);
+	if (clkrate < 10000000)
+		dev_warn(&pdev->dev,
+			 "Low clock frequency causes improper function\n");
+
+	clkdiv = DIV_ROUND_CLOSEST(clkrate, 1000000);
+	clkrate /= clkdiv;
+	if ((clkrate < 980000) || (clkrate > 1020000))
+		dev_warn(&pdev->dev,
+			 "Incorrect time base frequency %lu Hz\n", clkrate);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	mdev->regs = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(mdev->regs))
 		return PTR_ERR(mdev->regs);
 
-	clk_prepare_enable(mdev->clk);
-	__raw_writeb(mdev->clkdiv, mdev->regs + MXC_W1_TIME_DIVIDER);
+	err = clk_prepare_enable(mdev->clk);
+	if (err)
+		return err;
+
+	__raw_writeb(clkdiv - 1, mdev->regs + MXC_W1_TIME_DIVIDER);
 
 	mdev->bus_master.data = mdev;
 	mdev->bus_master.reset_bus = mxc_w1_ds2_reset_bus;
 	mdev->bus_master.touch_bit = mxc_w1_ds2_touch_bit;
 
-	err = w1_add_master_device(&mdev->bus_master);
+	platform_set_drvdata(pdev, mdev);
 
+	err = w1_add_master_device(&mdev->bus_master);
 	if (err)
-		return err;
+		clk_disable_unprepare(mdev->clk);
 
-	platform_set_drvdata(pdev, mdev);
-	return 0;
+	return err;
 }
 
 /*
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 288534920fe5..20d6697bd638 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1493,6 +1493,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
 				sb->s_blocksize - offset : towrite;
 
 		tmp_bh.b_state = 0;
+		tmp_bh.b_size = sb->s_blocksize;
 		err = ext2_get_block(inode, blk, &tmp_bh, 1);
 		if (err < 0)
 			goto out;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e6185031c1cc..ece55565b9cd 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -268,6 +268,16 @@ struct ext4_io_submit {
 /* Translate # of blks to # of clusters */
 #define EXT4_NUM_B2C(sbi, blks)	(((blks) + (sbi)->s_cluster_ratio - 1) >> \
 				 (sbi)->s_cluster_bits)
+/* Mask out the low bits to get the starting block of the cluster */
+#define EXT4_PBLK_CMASK(s, pblk) ((pblk) &				\
+				  ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
+#define EXT4_LBLK_CMASK(s, lblk) ((lblk) &				\
+				  ~((ext4_lblk_t) (s)->s_cluster_ratio - 1))
+/* Get the cluster offset */
+#define EXT4_PBLK_COFF(s, pblk) ((pblk) &				\
+				 ((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
+#define EXT4_LBLK_COFF(s, lblk) ((lblk) &				\
+				 ((ext4_lblk_t) (s)->s_cluster_ratio - 1))
 
 /*
  * Structure of a blocks group descriptor
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 17ac112ab101..3fe29de832c8 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -259,6 +259,15 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
 		if (WARN_ON_ONCE(err)) {
 			ext4_journal_abort_handle(where, line, __func__, bh,
 						  handle, err);
+			ext4_error_inode(inode, where, line,
+					 bh->b_blocknr,
+					 "journal_dirty_metadata failed: "
+					 "handle type %u started at line %u, "
+					 "credits %u/%u, errcode %d",
+					 handle->h_type,
+					 handle->h_line_no,
+					 handle->h_requested_credits,
+					 handle->h_buffer_credits, err);
 		}
 	} else {
 		if (inode)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 35f65cf4f318..4410cc3d6ee2 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -360,8 +360,10 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
 {
 	ext4_fsblk_t block = ext4_ext_pblock(ext);
 	int len = ext4_ext_get_actual_len(ext);
+	ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
+	ext4_lblk_t last = lblock + len - 1;
 
-	if (len == 0)
+	if (lblock > last)
 		return 0;
 	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
 }
@@ -387,11 +389,26 @@ static int ext4_valid_extent_entries(struct inode *inode,
 	if (depth == 0) {
 		/* leaf entries */
 		struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
+		struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+		ext4_fsblk_t pblock = 0;
+		ext4_lblk_t lblock = 0;
+		ext4_lblk_t prev = 0;
+		int len = 0;
 		while (entries) {
 			if (!ext4_valid_extent(inode, ext))
 				return 0;
+
+			/* Check for overlapping extents */
+			lblock = le32_to_cpu(ext->ee_block);
+			len = ext4_ext_get_actual_len(ext);
+			if ((lblock <= prev) && prev) {
+				pblock = ext4_ext_pblock(ext);
+				es->s_last_error_block = cpu_to_le64(pblock);
+				return 0;
+			}
 			ext++;
 			entries--;
+			prev = lblock + len - 1;
 		}
 	} else {
 		struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
@@ -1834,8 +1851,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
 	depth = ext_depth(inode);
 	if (!path[depth].p_ext)
 		goto out;
-	b2 = le32_to_cpu(path[depth].p_ext->ee_block);
-	b2 &= ~(sbi->s_cluster_ratio - 1);
+	b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
 
 	/*
 	 * get the next allocated block if the extent in the path
@@ -1845,7 +1861,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
 		b2 = ext4_ext_next_allocated_block(path);
 		if (b2 == EXT_MAX_BLOCKS)
 			goto out;
-		b2 &= ~(sbi->s_cluster_ratio - 1);
+		b2 = EXT4_LBLK_CMASK(sbi, b2);
 	}
 
 	/* check for wrap through zero on extent logical start block*/
@@ -2504,7 +2520,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
 		 * extent, we have to mark the cluster as used (store negative
 		 * cluster number in partial_cluster).
 		 */
-		unaligned = pblk & (sbi->s_cluster_ratio - 1);
+		unaligned = EXT4_PBLK_COFF(sbi, pblk);
 		if (unaligned && (ee_len == num) &&
 		    (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk))))
 			*partial_cluster = EXT4_B2C(sbi, pblk);
@@ -2598,7 +2614,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 			 * accidentally freeing it later on
 			 */
 			pblk = ext4_ext_pblock(ex);
-			if (pblk & (sbi->s_cluster_ratio - 1))
+			if (EXT4_PBLK_COFF(sbi, pblk))
 				*partial_cluster =
 					-((long long)EXT4_B2C(sbi, pblk));
 			ex--;
@@ -3753,7 +3769,7 @@ int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	ext4_lblk_t lblk_start, lblk_end;
-	lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
+	lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
 	lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
 
 	return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
@@ -3812,9 +3828,9 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
 	trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
 
 	/* Check towards left side */
-	c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
+	c_offset = EXT4_LBLK_COFF(sbi, lblk_start);
 	if (c_offset) {
-		lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
+		lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start);
 		lblk_to = lblk_from + c_offset - 1;
 
 		if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
@@ -3822,7 +3838,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
 	}
 
 	/* Now check towards right. */
-	c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
+	c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks);
 	if (allocated_clusters && c_offset) {
 		lblk_from = lblk_start + num_blks;
 		lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
@@ -4030,7 +4046,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
 				     struct ext4_ext_path *path)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+	ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
 	ext4_lblk_t ex_cluster_start, ex_cluster_end;
 	ext4_lblk_t rr_cluster_start;
 	ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
@@ -4048,8 +4064,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
 	    (rr_cluster_start == ex_cluster_start)) {
 		if (rr_cluster_start == ex_cluster_end)
 			ee_start += ee_len - 1;
-		map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
-			c_offset;
+		map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
 		map->m_len = min(map->m_len,
 				 (unsigned) sbi->s_cluster_ratio - c_offset);
 		/*
@@ -4203,7 +4218,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 	 */
 	map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
 	newex.ee_block = cpu_to_le32(map->m_lblk);
-	cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+	cluster_offset = EXT4_LBLK_CMASK(sbi, map->m_lblk);
 
 	/*
 	 * If we are doing bigalloc, check to see if the extent returned
@@ -4271,7 +4286,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 	 * needed so that future calls to get_implied_cluster_alloc()
 	 * work correctly.
 	 */
-	offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
+	offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
 	ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
 	ar.goal -= offset;
 	ar.logical -= offset;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 075763474118..61d49ff22c81 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1206,7 +1206,6 @@ static int ext4_journalled_write_end(struct file *file,
  */
 static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
 {
-	int retries = 0;
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	unsigned int md_needed;
@@ -1218,7 +1217,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
 	 * in order to allocate nrblocks
 	 * worse case is one extent per block
 	 */
-repeat:
 	spin_lock(&ei->i_block_reservation_lock);
 	/*
 	 * ext4_calc_metadata_amount() has side effects, which we have
@@ -1238,10 +1236,6 @@ repeat:
 		ei->i_da_metadata_calc_len = save_len;
 		ei->i_da_metadata_calc_last_lblock = save_last_lblock;
 		spin_unlock(&ei->i_block_reservation_lock);
-		if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
-			cond_resched();
-			goto repeat;
-		}
 		return -ENOSPC;
 	}
 	ei->i_reserved_meta_blocks += md_needed;
@@ -1255,7 +1249,6 @@ repeat:
  */
 static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
 {
-	int retries = 0;
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	unsigned int md_needed;
@@ -1277,7 +1270,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
 	 * in order to allocate nrblocks
 	 * worse case is one extent per block
 	 */
-repeat:
 	spin_lock(&ei->i_block_reservation_lock);
 	/*
 	 * ext4_calc_metadata_amount() has side effects, which we have
@@ -1297,10 +1289,6 @@ repeat:
 		ei->i_da_metadata_calc_len = save_len;
 		ei->i_da_metadata_calc_last_lblock = save_last_lblock;
 		spin_unlock(&ei->i_block_reservation_lock);
-		if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
-			cond_resched();
-			goto repeat;
-		}
 		dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
 		return -ENOSPC;
 	}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4d113efa024c..04a5c7504be9 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3442,6 +3442,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head)
 {
 	struct ext4_prealloc_space *pa;
 	pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
+
+	BUG_ON(atomic_read(&pa->pa_count));
+	BUG_ON(pa->pa_deleted == 0);
 	kmem_cache_free(ext4_pspace_cachep, pa);
 }
 
@@ -3455,11 +3458,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
 	ext4_group_t grp;
 	ext4_fsblk_t grp_blk;
 
-	if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
-		return;
-
 	/* in this short window concurrent discard can set pa_deleted */
 	spin_lock(&pa->pa_lock);
+	if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
+		spin_unlock(&pa->pa_lock);
+		return;
+	}
+
 	if (pa->pa_deleted == 1) {
 		spin_unlock(&pa->pa_lock);
 		return;
@@ -4121,7 +4126,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
 	ext4_get_group_no_and_offset(sb, goal, &group, &block);
 
 	/* set up allocation goals */
-	ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
+	ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
 	ac->ac_status = AC_STATUS_CONTINUE;
 	ac->ac_sb = sb;
 	ac->ac_inode = ar->inode;
@@ -4663,7 +4668,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
 	 * blocks at the beginning or the end unless we are explicitly
 	 * requested to avoid doing so.
 	 */
-	overflow = block & (sbi->s_cluster_ratio - 1);
+	overflow = EXT4_PBLK_COFF(sbi, block);
 	if (overflow) {
 		if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
 			overflow = sbi->s_cluster_ratio - overflow;
@@ -4677,7 +4682,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
 			count += overflow;
 		}
 	}
-	overflow = count & (sbi->s_cluster_ratio - 1);
+	overflow = EXT4_LBLK_COFF(sbi, count);
 	if (overflow) {
 		if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
 			if (count > overflow)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c977f4e4e63b..1f7784de05b6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -792,7 +792,7 @@ static void ext4_put_super(struct super_block *sb)
 	}
 
 	ext4_es_unregister_shrinker(sbi);
-	del_timer(&sbi->s_err_report);
+	del_timer_sync(&sbi->s_err_report);
 	ext4_release_system_zone(sb);
 	ext4_mb_release(sb);
 	ext4_ext_release(sb);
@@ -3316,11 +3316,19 @@ int ext4_calculate_overhead(struct super_block *sb)
 }
 
 
-static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
+static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb)
 {
 	ext4_fsblk_t resv_clusters;
 
 	/*
+	 * There's no need to reserve anything when we aren't using extents.
+	 * The space estimates are exact, there are no unwritten extents,
+	 * hole punching doesn't need new metadata... This is needed especially
+	 * to keep ext2/3 backward compatibility.
+	 */
+	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
+		return 0;
+	/*
 	 * By default we reserve 2% or 4096 clusters, whichever is smaller.
 	 * This should cover the situations where we can not afford to run
 	 * out of space like for example punch hole, or converting
@@ -3328,7 +3336,8 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
 	 * allocation would require 1, or 2 blocks, higher numbers are
 	 * very rare.
 	 */
-	resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
+	resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >>
+			EXT4_SB(sb)->s_cluster_bits;
 
 	do_div(resv_clusters, 50);
 	resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
@@ -4071,10 +4080,10 @@ no_journal:
 			 "available");
 	}
 
-	err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi));
+	err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb));
 	if (err) {
 		ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
-			 "reserved pool", ext4_calculate_resv_clusters(sbi));
+			 "reserved pool", ext4_calculate_resv_clusters(sb));
 		goto failed_mount4a;
 	}
 
@@ -4184,7 +4193,7 @@ failed_mount_wq:
 	}
 failed_mount3:
 	ext4_es_unregister_shrinker(sbi);
-	del_timer(&sbi->s_err_report);
+	del_timer_sync(&sbi->s_err_report);
 	if (sbi->s_flex_groups)
 		ext4_kvfree(sbi->s_flex_groups);
 	percpu_counter_destroy(&sbi->s_freeclusters_counter);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 52032647dd4a..5fa344afb49a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -702,7 +702,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
 	read_lock(&journal->j_state_lock);
 #ifdef CONFIG_JBD2_DEBUG
 	if (!tid_geq(journal->j_commit_request, tid)) {
-		printk(KERN_EMERG
+		printk(KERN_ERR
 		       "%s: error: j_commit_request=%d, tid=%d\n",
 		       __func__, journal->j_commit_request, tid);
 	}
@@ -718,10 +718,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
 	}
 	read_unlock(&journal->j_state_lock);
 
-	if (unlikely(is_journal_aborted(journal))) {
-		printk(KERN_EMERG "journal commit I/O error\n");
+	if (unlikely(is_journal_aborted(journal)))
 		err = -EIO;
-	}
 	return err;
 }
 
@@ -1527,13 +1525,13 @@ static int journal_get_superblock(journal_t *journal)
 	if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) &&
 	    JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
 		/* Can't have checksum v1 and v2 on at the same time! */
-		printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 "
+		printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
 		       "at the same time!\n");
 		goto out;
 	}
 
 	if (!jbd2_verify_csum_type(journal, sb)) {
-		printk(KERN_ERR "JBD: Unknown checksum type\n");
+		printk(KERN_ERR "JBD2: Unknown checksum type\n");
 		goto out;
 	}
 
@@ -1541,7 +1539,7 @@ static int journal_get_superblock(journal_t *journal)
 	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
 		journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
 		if (IS_ERR(journal->j_chksum_driver)) {
-			printk(KERN_ERR "JBD: Cannot load crc32c driver.\n");
+			printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
 			err = PTR_ERR(journal->j_chksum_driver);
 			journal->j_chksum_driver = NULL;
 			goto out;
@@ -1550,7 +1548,7 @@ static int journal_get_superblock(journal_t *journal)
 
 	/* Check superblock checksum */
 	if (!jbd2_superblock_csum_verify(journal, sb)) {
-		printk(KERN_ERR "JBD: journal checksum error\n");
+		printk(KERN_ERR "JBD2: journal checksum error\n");
 		goto out;
 	}
 
@@ -1836,7 +1834,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
 			journal->j_chksum_driver = crypto_alloc_shash("crc32c",
 								      0, 0);
 			if (IS_ERR(journal->j_chksum_driver)) {
-				printk(KERN_ERR "JBD: Cannot load crc32c "
+				printk(KERN_ERR "JBD2: Cannot load crc32c "
 				       "driver.\n");
 				journal->j_chksum_driver = NULL;
 				return 0;
@@ -2645,7 +2643,7 @@ static void __exit journal_exit(void)
 #ifdef CONFIG_JBD2_DEBUG
 	int n = atomic_read(&nr_journal_heads);
 	if (n)
-		printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n);
+		printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n);
 #endif
 	jbd2_remove_jbd_stats_proc_entry();
 	jbd2_journal_destroy_caches();
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 3929c50428b1..3b6bb19d60b1 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -594,7 +594,7 @@ static int do_one_pass(journal_t *journal,
 						be32_to_cpu(tmp->h_sequence))) {
 						brelse(obh);
 						success = -EIO;
-						printk(KERN_ERR "JBD: Invalid "
+						printk(KERN_ERR "JBD2: Invalid "
 						       "checksum recovering "
 						       "block %llu in log\n",
 						       blocknr);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 7aa9a32573bb..8360674c85bc 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -932,7 +932,7 @@ repeat:
 					jbd2_alloc(jh2bh(jh)->b_size,
 							 GFP_NOFS);
 				if (!frozen_buffer) {
-					printk(KERN_EMERG
+					printk(KERN_ERR
 					       "%s: OOM for frozen_buffer\n",
 					       __func__);
 					JBUFFER_TRACE(jh, "oom!");
@@ -1166,7 +1166,7 @@ repeat:
 	if (!jh->b_committed_data) {
 		committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
 		if (!committed_data) {
-			printk(KERN_EMERG "%s: No memory for committed data\n",
+			printk(KERN_ERR "%s: No memory for committed data\n",
 				__func__);
 			err = -ENOMEM;
 			goto out;
@@ -1290,7 +1290,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 		 * once a transaction -bzzz
 		 */
 		jh->b_modified = 1;
-		J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
+		if (handle->h_buffer_credits <= 0) {
+			ret = -ENOSPC;
+			goto out_unlock_bh;
+		}
 		handle->h_buffer_credits--;
 	}
 
@@ -1305,7 +1308,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 		JBUFFER_TRACE(jh, "fastpath");
 		if (unlikely(jh->b_transaction !=
 			     journal->j_running_transaction)) {
-			printk(KERN_EMERG "JBD: %s: "
+			printk(KERN_ERR "JBD2: %s: "
 			       "jh->b_transaction (%llu, %p, %u) != "
 			       "journal->j_running_transaction (%p, %u)",
 			       journal->j_devname,
@@ -1332,7 +1335,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 		JBUFFER_TRACE(jh, "already on other transaction");
 		if (unlikely(jh->b_transaction !=
 			     journal->j_committing_transaction)) {
-			printk(KERN_EMERG "JBD: %s: "
+			printk(KERN_ERR "JBD2: %s: "
 			       "jh->b_transaction (%llu, %p, %u) != "
 			       "journal->j_committing_transaction (%p, %u)",
 			       journal->j_devname,
@@ -1345,7 +1348,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 			ret = -EINVAL;
 		}
 		if (unlikely(jh->b_next_transaction != transaction)) {
-			printk(KERN_EMERG "JBD: %s: "
+			printk(KERN_ERR "JBD2: %s: "
 			       "jh->b_next_transaction (%llu, %p, %u) != "
 			       "transaction (%p, %u)",
 			       journal->j_devname,
@@ -1373,7 +1376,6 @@ out_unlock_bh:
 	jbd2_journal_put_journal_head(jh);
 out:
 	JBUFFER_TRACE(jh, "exit");
-	WARN_ON(ret);	/* All errors are bugs, so dump the stack */
 	return ret;
 }
 
diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h
index 669fef5c745a..3e0fbe441763 100644
--- a/include/linux/auxvec.h
+++ b/include/linux/auxvec.h
@@ -3,6 +3,6 @@
 
 #include <uapi/linux/auxvec.h>
 
-#define AT_VECTOR_SIZE_BASE 19 /* NEW_AUX_ENT entries in auxiliary table */
+#define AT_VECTOR_SIZE_BASE 20 /* NEW_AUX_ENT entries in auxiliary table */
   /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */
 #endif /* _LINUX_AUXVEC_H */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 0e23c26485f4..9b503376738f 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -418,6 +418,7 @@ enum {
 	ATA_HORKAGE_DUMP_ID	= (1 << 16),	/* dump IDENTIFY data */
 	ATA_HORKAGE_MAX_SEC_LBA48 = (1 << 17),	/* Set max sects to 65535 */
 	ATA_HORKAGE_ATAPI_DMADIR = (1 << 18),	/* device requires dmadir */
+	ATA_HORKAGE_NO_NCQ_TRIM	= (1 << 19),	/* don't use queued TRIM */
 
 	 /* DMA mask for user DMA control: User visible values; DO NOT
 	    renumber */
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 57e890abe1f0..a5fc7d01aad6 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -69,6 +69,7 @@
 	__PCPU_DUMMY_ATTRS char __pcpu_scope_##name;			\
 	extern __PCPU_DUMMY_ATTRS char __pcpu_unique_##name;		\
 	__PCPU_DUMMY_ATTRS char __pcpu_unique_##name;			\
+	extern __PCPU_ATTRS(sec) __typeof__(type) name;			\
 	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak			\
 	__typeof__(type) name
 #else
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 979874c627ee..61e1935c91b1 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -978,7 +978,7 @@ struct ib_uobject {
 };
 
 struct ib_udata {
-	void __user *inbuf;
+	const void __user *inbuf;
 	void __user *outbuf;
 	size_t       inlen;
 	size_t       outlen;
diff --git a/include/uapi/linux/genwqe/genwqe_card.h b/include/uapi/linux/genwqe/genwqe_card.h
new file mode 100644
index 000000000000..795e957bb840
--- /dev/null
+++ b/include/uapi/linux/genwqe/genwqe_card.h
@@ -0,0 +1,500 @@
+#ifndef __GENWQE_CARD_H__
+#define __GENWQE_CARD_H__
+
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * User-space API for the GenWQE card. For debugging and test purposes
+ * the register addresses are included here too.
+ */
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/* Basename of sysfs, debugfs and /dev interfaces */
+#define GENWQE_DEVNAME			"genwqe"
+
+#define GENWQE_TYPE_ALTERA_230		0x00 /* GenWQE4 Stratix-IV-230 */
+#define GENWQE_TYPE_ALTERA_530		0x01 /* GenWQE4 Stratix-IV-530 */
+#define GENWQE_TYPE_ALTERA_A4		0x02 /* GenWQE5 A4 Stratix-V-A4 */
+#define GENWQE_TYPE_ALTERA_A7		0x03 /* GenWQE5 A7 Stratix-V-A7 */
+
+/* MMIO Unit offsets: Each UnitID occupies a defined address range */
+#define GENWQE_UID_OFFS(uid)		((uid) << 24)
+#define GENWQE_SLU_OFFS			GENWQE_UID_OFFS(0)
+#define GENWQE_HSU_OFFS			GENWQE_UID_OFFS(1)
+#define GENWQE_APP_OFFS			GENWQE_UID_OFFS(2)
+#define GENWQE_MAX_UNITS		3
+
+/* Common offsets per UnitID */
+#define IO_EXTENDED_ERROR_POINTER	0x00000048
+#define IO_ERROR_INJECT_SELECTOR	0x00000060
+#define IO_EXTENDED_DIAG_SELECTOR	0x00000070
+#define IO_EXTENDED_DIAG_READ_MBX	0x00000078
+#define IO_EXTENDED_DIAG_MAP(ring)	(0x00000500 | ((ring) << 3))
+
+#define GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace) (((ring) << 8) | (trace))
+
+/* UnitID 0: Service Layer Unit (SLU) */
+
+/* SLU: Unit Configuration Register */
+#define IO_SLU_UNITCFG			0x00000000
+#define IO_SLU_UNITCFG_TYPE_MASK	0x000000000ff00000 /* 27:20 */
+
+/* SLU: Fault Isolation Register (FIR) (ac_slu_fir) */
+#define IO_SLU_FIR			0x00000008 /* read only, wr direct */
+#define IO_SLU_FIR_CLR			0x00000010 /* read and clear */
+
+/* SLU: First Error Capture Register (FEC/WOF) */
+#define IO_SLU_FEC			0x00000018
+
+#define IO_SLU_ERR_ACT_MASK		0x00000020
+#define IO_SLU_ERR_ATTN_MASK		0x00000028
+#define IO_SLU_FIRX1_ACT_MASK		0x00000030
+#define IO_SLU_FIRX0_ACT_MASK		0x00000038
+#define IO_SLU_SEC_LEM_DEBUG_OVR	0x00000040
+#define IO_SLU_EXTENDED_ERR_PTR		0x00000048
+#define IO_SLU_COMMON_CONFIG		0x00000060
+
+#define IO_SLU_FLASH_FIR		0x00000108
+#define IO_SLU_SLC_FIR			0x00000110
+#define IO_SLU_RIU_TRAP			0x00000280
+#define IO_SLU_FLASH_FEC		0x00000308
+#define IO_SLU_SLC_FEC			0x00000310
+
+/*
+ * The  Virtual Function's Access is from offset 0x00010000
+ * The Physical Function's Access is from offset 0x00050000
+ * Single Shared Registers exists only at offset 0x00060000
+ *
+ * SLC: Queue Virtual Window Window for accessing into a specific VF
+ * queue. When accessing the 0x10000 space using the 0x50000 address
+ * segment, the value indicated here is used to specify which VF
+ * register is decoded. This register, and the 0x50000 register space
+ * can only be accessed by the PF. Example, if this register is set to
+ * 0x2, then a read from 0x50000 is the same as a read from 0x10000
+ * from VF=2.
+ */
+
+/* SLC: Queue Segment */
+#define IO_SLC_QUEUE_SEGMENT		0x00010000
+#define IO_SLC_VF_QUEUE_SEGMENT		0x00050000
+
+/* SLC: Queue Offset */
+#define IO_SLC_QUEUE_OFFSET		0x00010008
+#define IO_SLC_VF_QUEUE_OFFSET		0x00050008
+
+/* SLC: Queue Configuration */
+#define IO_SLC_QUEUE_CONFIG		0x00010010
+#define IO_SLC_VF_QUEUE_CONFIG		0x00050010
+
+/* SLC: Job Timout/Only accessible for the PF */
+#define IO_SLC_APPJOB_TIMEOUT		0x00010018
+#define IO_SLC_VF_APPJOB_TIMEOUT	0x00050018
+#define TIMEOUT_250MS			0x0000000f
+#define HEARTBEAT_DISABLE		0x0000ff00
+
+/* SLC: Queue InitSequence Register */
+#define	IO_SLC_QUEUE_INITSQN		0x00010020
+#define	IO_SLC_VF_QUEUE_INITSQN		0x00050020
+
+/* SLC: Queue Wrap */
+#define IO_SLC_QUEUE_WRAP		0x00010028
+#define IO_SLC_VF_QUEUE_WRAP		0x00050028
+
+/* SLC: Queue Status */
+#define IO_SLC_QUEUE_STATUS		0x00010100
+#define IO_SLC_VF_QUEUE_STATUS		0x00050100
+
+/* SLC: Queue Working Time */
+#define IO_SLC_QUEUE_WTIME		0x00010030
+#define IO_SLC_VF_QUEUE_WTIME		0x00050030
+
+/* SLC: Queue Error Counts */
+#define IO_SLC_QUEUE_ERRCNTS		0x00010038
+#define IO_SLC_VF_QUEUE_ERRCNTS		0x00050038
+
+/* SLC: Queue Loast Response Word */
+#define IO_SLC_QUEUE_LRW		0x00010040
+#define IO_SLC_VF_QUEUE_LRW		0x00050040
+
+/* SLC: Freerunning Timer */
+#define IO_SLC_FREE_RUNNING_TIMER	0x00010108
+#define IO_SLC_VF_FREE_RUNNING_TIMER	0x00050108
+
+/* SLC: Queue Virtual Access Region */
+#define IO_PF_SLC_VIRTUAL_REGION	0x00050000
+
+/* SLC: Queue Virtual Window */
+#define IO_PF_SLC_VIRTUAL_WINDOW	0x00060000
+
+/* SLC: DDCB Application Job Pending [n] (n=0:63) */
+#define IO_PF_SLC_JOBPEND(n)		(0x00061000 + 8*(n))
+#define IO_SLC_JOBPEND(n)		IO_PF_SLC_JOBPEND(n)
+
+/* SLC: Parser Trap RAM [n] (n=0:31) */
+#define IO_SLU_SLC_PARSE_TRAP(n)	(0x00011000 + 8*(n))
+
+/* SLC: Dispatcher Trap RAM [n] (n=0:31) */
+#define IO_SLU_SLC_DISP_TRAP(n)	(0x00011200 + 8*(n))
+
+/* Global Fault Isolation Register (GFIR) */
+#define IO_SLC_CFGREG_GFIR		0x00020000
+#define GFIR_ERR_TRIGGER		0x0000ffff
+
+/* SLU: Soft Reset Register */
+#define IO_SLC_CFGREG_SOFTRESET		0x00020018
+
+/* SLU: Misc Debug Register */
+#define IO_SLC_MISC_DEBUG		0x00020060
+#define IO_SLC_MISC_DEBUG_CLR		0x00020068
+#define IO_SLC_MISC_DEBUG_SET		0x00020070
+
+/* Temperature Sensor Reading */
+#define IO_SLU_TEMPERATURE_SENSOR	0x00030000
+#define IO_SLU_TEMPERATURE_CONFIG	0x00030008
+
+/* Voltage Margining Control */
+#define IO_SLU_VOLTAGE_CONTROL		0x00030080
+#define IO_SLU_VOLTAGE_NOMINAL		0x00000000
+#define IO_SLU_VOLTAGE_DOWN5		0x00000006
+#define IO_SLU_VOLTAGE_UP5		0x00000007
+
+/* Direct LED Control Register */
+#define IO_SLU_LEDCONTROL		0x00030100
+
+/* SLU: Flashbus Direct Access -A5 */
+#define IO_SLU_FLASH_DIRECTACCESS	0x00040010
+
+/* SLU: Flashbus Direct Access2 -A5 */
+#define IO_SLU_FLASH_DIRECTACCESS2	0x00040020
+
+/* SLU: Flashbus Command Interface -A5 */
+#define IO_SLU_FLASH_CMDINTF		0x00040030
+
+/* SLU: BitStream Loaded */
+#define IO_SLU_BITSTREAM		0x00040040
+
+/* This Register has a switch which will change the CAs to UR */
+#define IO_HSU_ERR_BEHAVIOR		0x01001010
+
+#define IO_SLC2_SQB_TRAP		0x00062000
+#define IO_SLC2_QUEUE_MANAGER_TRAP	0x00062008
+#define IO_SLC2_FLS_MASTER_TRAP		0x00062010
+
+/* UnitID 1: HSU Registers */
+#define IO_HSU_UNITCFG			0x01000000
+#define IO_HSU_FIR			0x01000008
+#define IO_HSU_FIR_CLR			0x01000010
+#define IO_HSU_FEC			0x01000018
+#define IO_HSU_ERR_ACT_MASK		0x01000020
+#define IO_HSU_ERR_ATTN_MASK		0x01000028
+#define IO_HSU_FIRX1_ACT_MASK		0x01000030
+#define IO_HSU_FIRX0_ACT_MASK		0x01000038
+#define IO_HSU_SEC_LEM_DEBUG_OVR	0x01000040
+#define IO_HSU_EXTENDED_ERR_PTR		0x01000048
+#define IO_HSU_COMMON_CONFIG		0x01000060
+
+/* UnitID 2: Application Unit (APP) */
+#define IO_APP_UNITCFG			0x02000000
+#define IO_APP_FIR			0x02000008
+#define IO_APP_FIR_CLR			0x02000010
+#define IO_APP_FEC			0x02000018
+#define IO_APP_ERR_ACT_MASK		0x02000020
+#define IO_APP_ERR_ATTN_MASK		0x02000028
+#define IO_APP_FIRX1_ACT_MASK		0x02000030
+#define IO_APP_FIRX0_ACT_MASK		0x02000038
+#define IO_APP_SEC_LEM_DEBUG_OVR	0x02000040
+#define IO_APP_EXTENDED_ERR_PTR		0x02000048
+#define IO_APP_COMMON_CONFIG		0x02000060
+
+#define IO_APP_DEBUG_REG_01		0x02010000
+#define IO_APP_DEBUG_REG_02		0x02010008
+#define IO_APP_DEBUG_REG_03		0x02010010
+#define IO_APP_DEBUG_REG_04		0x02010018
+#define IO_APP_DEBUG_REG_05		0x02010020
+#define IO_APP_DEBUG_REG_06		0x02010028
+#define IO_APP_DEBUG_REG_07		0x02010030
+#define IO_APP_DEBUG_REG_08		0x02010038
+#define IO_APP_DEBUG_REG_09		0x02010040
+#define IO_APP_DEBUG_REG_10		0x02010048
+#define IO_APP_DEBUG_REG_11		0x02010050
+#define IO_APP_DEBUG_REG_12		0x02010058
+#define IO_APP_DEBUG_REG_13		0x02010060
+#define IO_APP_DEBUG_REG_14		0x02010068
+#define IO_APP_DEBUG_REG_15		0x02010070
+#define IO_APP_DEBUG_REG_16		0x02010078
+#define IO_APP_DEBUG_REG_17		0x02010080
+#define IO_APP_DEBUG_REG_18		0x02010088
+
+/* Read/write from/to registers */
+struct genwqe_reg_io {
+	__u64 num;		/* register offset/address */
+	__u64 val64;
+};
+
+/*
+ * All registers of our card will return values not equal this values.
+ * If we see IO_ILLEGAL_VALUE on any of our MMIO register reads, the
+ * card can be considered as unusable. It will need recovery.
+ */
+#define IO_ILLEGAL_VALUE		0xffffffffffffffffull
+
+/*
+ * Generic DDCB execution interface.
+ *
+ * This interface is a first prototype resulting from discussions we
+ * had with other teams which wanted to use the Genwqe card. It allows
+ * to issue a DDCB request in a generic way. The request will block
+ * until it finishes or time out with error.
+ *
+ * Some DDCBs require DMA addresses to be specified in the ASIV
+ * block. The interface provies the capability to let the kernel
+ * driver know where those addresses are by specifying the ATS field,
+ * such that it can replace the user-space addresses with appropriate
+ * DMA addresses or DMA addresses of a scatter gather list which is
+ * dynamically created.
+ *
+ * Our hardware will refuse DDCB execution if the ATS field is not as
+ * expected. That means the DDCB execution engine in the chip knows
+ * where it expects DMA addresses within the ASIV part of the DDCB and
+ * will check that against the ATS field definition. Any invalid or
+ * unknown ATS content will lead to DDCB refusal.
+ */
+
+/* Genwqe chip Units */
+#define DDCB_ACFUNC_SLU			0x00  /* chip service layer unit */
+#define DDCB_ACFUNC_APP			0x01  /* chip application */
+
+/* DDCB return codes (RETC) */
+#define DDCB_RETC_IDLE			0x0000 /* Unexecuted/DDCB created */
+#define DDCB_RETC_PENDING		0x0101 /* Pending Execution */
+#define DDCB_RETC_COMPLETE		0x0102 /* Cmd complete. No error */
+#define DDCB_RETC_FAULT			0x0104 /* App Err, recoverable */
+#define DDCB_RETC_ERROR			0x0108 /* App Err, non-recoverable */
+#define DDCB_RETC_FORCED_ERROR		0x01ff /* overwritten by driver  */
+
+#define DDCB_RETC_UNEXEC		0x0110 /* Unexe/Removed from queue */
+#define DDCB_RETC_TERM			0x0120 /* Terminated */
+#define DDCB_RETC_RES0			0x0140 /* Reserved */
+#define DDCB_RETC_RES1			0x0180 /* Reserved */
+
+/* DDCB Command Options (CMDOPT) */
+#define DDCB_OPT_ECHO_FORCE_NO		0x0000 /* ECHO DDCB */
+#define DDCB_OPT_ECHO_FORCE_102		0x0001 /* force return code */
+#define DDCB_OPT_ECHO_FORCE_104		0x0002
+#define DDCB_OPT_ECHO_FORCE_108		0x0003
+
+#define DDCB_OPT_ECHO_FORCE_110		0x0004 /* only on PF ! */
+#define DDCB_OPT_ECHO_FORCE_120		0x0005
+#define DDCB_OPT_ECHO_FORCE_140		0x0006
+#define DDCB_OPT_ECHO_FORCE_180		0x0007
+
+#define DDCB_OPT_ECHO_COPY_NONE		(0 << 5)
+#define DDCB_OPT_ECHO_COPY_ALL		(1 << 5)
+
+/* Definitions of Service Layer Commands */
+#define SLCMD_ECHO_SYNC			0x00 /* PF/VF */
+#define SLCMD_MOVE_FLASH		0x06 /* PF only */
+#define SLCMD_MOVE_FLASH_FLAGS_MODE	0x03 /* bit 0 and 1 used for mode */
+#define SLCMD_MOVE_FLASH_FLAGS_DLOAD	0	/* mode: download  */
+#define SLCMD_MOVE_FLASH_FLAGS_EMUL	1	/* mode: emulation */
+#define SLCMD_MOVE_FLASH_FLAGS_UPLOAD	2	/* mode: upload	   */
+#define SLCMD_MOVE_FLASH_FLAGS_VERIFY	3	/* mode: verify	   */
+#define SLCMD_MOVE_FLASH_FLAG_NOTAP	(1 << 2)/* just dump DDCB and exit */
+#define SLCMD_MOVE_FLASH_FLAG_POLL	(1 << 3)/* wait for RETC >= 0102   */
+#define SLCMD_MOVE_FLASH_FLAG_PARTITION	(1 << 4)
+#define SLCMD_MOVE_FLASH_FLAG_ERASE	(1 << 5)
+
+enum genwqe_card_state {
+	GENWQE_CARD_UNUSED = 0,
+	GENWQE_CARD_USED = 1,
+	GENWQE_CARD_FATAL_ERROR = 2,
+	GENWQE_CARD_STATE_MAX,
+};
+
+/* common struct for chip image exchange */
+struct genwqe_bitstream {
+	__u64 data_addr;		/* pointer to image data */
+	__u32 size;			/* size of image file */
+	__u32 crc;			/* crc of this image */
+	__u64 target_addr;		/* starting address in Flash */
+	__u32 partition;		/* '0', '1', or 'v' */
+	__u32 uid;			/* 1=host/x=dram */
+
+	__u64 slu_id;			/* informational/sim: SluID */
+	__u64 app_id;			/* informational/sim: AppID */
+
+	__u16 retc;			/* returned from processing */
+	__u16 attn;			/* attention code from processing */
+	__u32 progress;			/* progress code from processing */
+};
+
+/* Issuing a specific DDCB command */
+#define DDCB_LENGTH			256 /* for debug data */
+#define DDCB_ASIV_LENGTH		104 /* len of the DDCB ASIV array */
+#define DDCB_ASIV_LENGTH_ATS		96  /* ASIV in ATS architecture */
+#define DDCB_ASV_LENGTH			64  /* len of the DDCB ASV array  */
+#define DDCB_FIXUPS			12  /* maximum number of fixups */
+
+struct genwqe_debug_data {
+	char driver_version[64];
+	__u64 slu_unitcfg;
+	__u64 app_unitcfg;
+
+	__u8  ddcb_before[DDCB_LENGTH];
+	__u8  ddcb_prev[DDCB_LENGTH];
+	__u8  ddcb_finished[DDCB_LENGTH];
+};
+
+/*
+ * Address Translation Specification (ATS) definitions
+ *
+ * Each 4 bit within the ATS 64-bit word specify the required address
+ * translation at the defined offset.
+ *
+ * 63 LSB
+ *         6666.5555.5555.5544.4444.4443.3333.3333 ... 11
+ *         3210.9876.5432.1098.7654.3210.9876.5432 ... 1098.7654.3210
+ *
+ * offset: 0x00 0x08 0x10 0x18 0x20 0x28 0x30 0x38 ... 0x68 0x70 0x78
+ *         res  res  res  res  ASIV ...
+ * The first 4 entries in the ATS word are reserved. The following nibbles
+ * each describe at an 8 byte offset the format of the required data.
+ */
+#define ATS_TYPE_DATA			0x0ull /* data  */
+#define ATS_TYPE_FLAT_RD		0x4ull /* flat buffer read only */
+#define ATS_TYPE_FLAT_RDWR		0x5ull /* flat buffer read/write */
+#define ATS_TYPE_SGL_RD			0x6ull /* sgl read only */
+#define ATS_TYPE_SGL_RDWR		0x7ull /* sgl read/write */
+
+#define ATS_SET_FLAGS(_struct, _field, _flags)				\
+	(((_flags) & 0xf) << (44 - (4 * (offsetof(_struct, _field) / 8))))
+
+#define ATS_GET_FLAGS(_ats, _byte_offs)					\
+	(((_ats)	  >> (44 - (4 * ((_byte_offs) / 8)))) & 0xf)
+
+/**
+ * struct genwqe_ddcb_cmd - User parameter for generic DDCB commands
+ *
+ * On the way into the kernel the driver will read the whole data
+ * structure. On the way out the driver will not copy the ASIV data
+ * back to user-space.
+ */
+struct genwqe_ddcb_cmd {
+	/* START of data copied to/from driver */
+	__u64 next_addr;		/* chaining genwqe_ddcb_cmd */
+	__u64 flags;			/* reserved */
+
+	__u8  acfunc;			/* accelerators functional unit */
+	__u8  cmd;			/* command to execute */
+	__u8  asiv_length;		/* used parameter length */
+	__u8  asv_length;		/* length of valid return values  */
+	__u16 cmdopts;			/* command options */
+	__u16 retc;			/* return code from processing    */
+
+	__u16 attn;			/* attention code from processing */
+	__u16 vcrc;			/* variant crc16 */
+	__u32 progress;			/* progress code from processing  */
+
+	__u64 deque_ts;			/* dequeue time stamp */
+	__u64 cmplt_ts;			/* completion time stamp */
+	__u64 disp_ts;			/* SW processing start */
+
+	/* move to end and avoid copy-back */
+	__u64 ddata_addr;		/* collect debug data */
+
+	/* command specific values */
+	__u8  asv[DDCB_ASV_LENGTH];
+
+	/* END of data copied from driver */
+	union {
+		struct {
+			__u64 ats;
+			__u8  asiv[DDCB_ASIV_LENGTH_ATS];
+		};
+		/* used for flash update to keep it backward compatible */
+		__u8 __asiv[DDCB_ASIV_LENGTH];
+	};
+	/* END of data copied to driver */
+};
+
+#define GENWQE_IOC_CODE	    0xa5
+
+/* Access functions */
+#define GENWQE_READ_REG64   _IOR(GENWQE_IOC_CODE, 30, struct genwqe_reg_io)
+#define GENWQE_WRITE_REG64  _IOW(GENWQE_IOC_CODE, 31, struct genwqe_reg_io)
+#define GENWQE_READ_REG32   _IOR(GENWQE_IOC_CODE, 32, struct genwqe_reg_io)
+#define GENWQE_WRITE_REG32  _IOW(GENWQE_IOC_CODE, 33, struct genwqe_reg_io)
+#define GENWQE_READ_REG16   _IOR(GENWQE_IOC_CODE, 34, struct genwqe_reg_io)
+#define GENWQE_WRITE_REG16  _IOW(GENWQE_IOC_CODE, 35, struct genwqe_reg_io)
+
+#define GENWQE_GET_CARD_STATE _IOR(GENWQE_IOC_CODE, 36,	enum genwqe_card_state)
+
+/**
+ * struct genwqe_mem - Memory pinning/unpinning information
+ * @addr:          virtual user space address
+ * @size:          size of the area pin/dma-map/unmap
+ * direction:      0: read/1: read and write
+ *
+ * Avoid pinning and unpinning of memory pages dynamically. Instead
+ * the idea is to pin the whole buffer space required for DDCB
+ * opertionas in advance. The driver will reuse this pinning and the
+ * memory associated with it to setup the sglists for the DDCB
+ * requests without the need to allocate and free memory or map and
+ * unmap to get the DMA addresses.
+ *
+ * The inverse operation needs to be called after the pinning is not
+ * needed anymore. The pinnings else the pinnings will get removed
+ * after the device is closed. Note that pinnings will required
+ * memory.
+ */
+struct genwqe_mem {
+	__u64 addr;
+	__u64 size;
+	__u64 direction;
+	__u64 flags;
+};
+
+#define GENWQE_PIN_MEM	      _IOWR(GENWQE_IOC_CODE, 40, struct genwqe_mem)
+#define GENWQE_UNPIN_MEM      _IOWR(GENWQE_IOC_CODE, 41, struct genwqe_mem)
+
+/*
+ * Generic synchronous DDCB execution interface.
+ * Synchronously execute a DDCB.
+ *
+ * Return: 0 on success or negative error code.
+ *         -EINVAL: Invalid parameters (ASIV_LEN, ASV_LEN, illegal fixups
+ *                  no mappings found/could not create mappings
+ *         -EFAULT: illegal addresses in fixups, purging failed
+ *         -EBADMSG: enqueing failed, retc != DDCB_RETC_COMPLETE
+ */
+#define GENWQE_EXECUTE_DDCB					\
+	_IOWR(GENWQE_IOC_CODE, 50, struct genwqe_ddcb_cmd)
+
+#define GENWQE_EXECUTE_RAW_DDCB					\
+	_IOWR(GENWQE_IOC_CODE, 51, struct genwqe_ddcb_cmd)
+
+/* Service Layer functions (PF only) */
+#define GENWQE_SLU_UPDATE  _IOWR(GENWQE_IOC_CODE, 80, struct genwqe_bitstream)
+#define GENWQE_SLU_READ	   _IOWR(GENWQE_IOC_CODE, 81, struct genwqe_bitstream)
+
+#endif	/* __GENWQE_CARD_H__ */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8b729c278b64..bc1dcabe9217 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -890,6 +890,16 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 		struct cgroup *cgrp = dentry->d_fsdata;
 
 		BUG_ON(!(cgroup_is_dead(cgrp)));
+
+		/*
+		 * XXX: cgrp->id is only used to look up css's.  As cgroup
+		 * and css's lifetimes will be decoupled, it should be made
+		 * per-subsystem and moved to css->id so that lookups are
+		 * successful until the target css is released.
+		 */
+		idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
+		cgrp->id = -1;
+
 		call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
 	} else {
 		struct cfent *cfe = __d_cfe(dentry);
@@ -4268,6 +4278,7 @@ static void css_release(struct percpu_ref *ref)
 	struct cgroup_subsys_state *css =
 		container_of(ref, struct cgroup_subsys_state, refcnt);
 
+	rcu_assign_pointer(css->cgroup->subsys[css->ss->subsys_id], NULL);
 	call_rcu(&css->rcu_head, css_free_rcu_fn);
 }
 
@@ -4426,14 +4437,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 	list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
 	root->number_of_cgroups++;
 
-	/* each css holds a ref to the cgroup's dentry and the parent css */
-	for_each_root_subsys(root, ss) {
-		struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
-
-		dget(dentry);
-		css_get(css->parent);
-	}
-
 	/* hold a ref to the parent's dentry */
 	dget(parent->dentry);
 
@@ -4445,6 +4448,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 		if (err)
 			goto err_destroy;
 
+		/* each css holds a ref to the cgroup's dentry and parent css */
+		dget(dentry);
+		css_get(css->parent);
+
+		/* mark it consumed for error path */
+		css_ar[ss->subsys_id] = NULL;
+
 		if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
 		    parent->parent) {
 			pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
@@ -4491,6 +4501,14 @@ err_free_cgrp:
 	return err;
 
 err_destroy:
+	for_each_root_subsys(root, ss) {
+		struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
+
+		if (css) {
+			percpu_ref_cancel_init(&css->refcnt);
+			ss->css_free(css);
+		}
+	}
 	cgroup_destroy_locked(cgrp);
 	mutex_unlock(&cgroup_mutex);
 	mutex_unlock(&dentry->d_inode->i_mutex);
@@ -4652,8 +4670,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	 * will be invoked to perform the rest of destruction once the
 	 * percpu refs of all css's are confirmed to be killed.
 	 */
-	for_each_root_subsys(cgrp->root, ss)
-		kill_css(cgroup_css(cgrp, ss));
+	for_each_root_subsys(cgrp->root, ss) {
+		struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
+
+		if (css)
+			kill_css(css);
+	}
 
 	/*
 	 * Mark @cgrp dead.  This prevents further task migration and child
@@ -4722,14 +4744,6 @@ static void cgroup_destroy_css_killed(struct cgroup *cgrp)
 	/* delete this cgroup from parent->children */
 	list_del_rcu(&cgrp->sibling);
 
-	/*
-	 * We should remove the cgroup object from idr before its grace
-	 * period starts, so we won't be looking up a cgroup while the
-	 * cgroup is being freed.
-	 */
-	idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
-	cgrp->id = -1;
-
 	dput(d);
 
 	set_bit(CGRP_RELEASABLE, &parent->flags);
diff --git a/kernel/freezer.c b/kernel/freezer.c
index b462fa197517..aa6a8aadb911 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -19,6 +19,12 @@ EXPORT_SYMBOL(system_freezing_cnt);
 bool pm_freezing;
 bool pm_nosig_freezing;
 
+/*
+ * Temporary export for the deadlock workaround in ata_scsi_hotplug().
+ * Remove once the hack becomes unnecessary.
+ */
+EXPORT_SYMBOL_GPL(pm_freezing);
+
 /* protects freezing and frozen transitions */
 static DEFINE_SPINLOCK(freezer_lock);
 
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 463aa6736751..eacb8bd8cab4 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -81,6 +81,7 @@ void pm_vt_switch_unregister(struct device *dev)
 	list_for_each_entry(tmp, &pm_vt_switch_list, head) {
 		if (tmp->dev == dev) {
 			list_del(&tmp->head);
+			kfree(tmp);
 			break;
 		}
 	}
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 32b10f53d0b4..2dcb37736d84 100644
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -82,7 +82,9 @@ kallsyms()
 		kallsymopt="${kallsymopt} --all-symbols"
 	fi
 
-	kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET"
+	if [ -n "${CONFIG_ARM}" ] && [ -n "${CONFIG_PAGE_OFFSET}" ]; then
+		kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET"
+	fi
 
 	local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL}               \
 		      ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}"
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 419491d8e7d2..6625699f497c 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4334,8 +4334,10 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		}
 		err = avc_has_perm(sk_sid, peer_sid, SECCLASS_PEER,
 				   PEER__RECV, &ad);
-		if (err)
+		if (err) {
 			selinux_netlbl_err(skb, err, 0);
+			return err;
+		}
 	}
 
 	if (secmark_active) {
@@ -5586,11 +5588,11 @@ static int selinux_setprocattr(struct task_struct *p,
 		/* Check for ptracing, and update the task SID if ok.
 		   Otherwise, leave SID unchanged and fail. */
 		ptsid = 0;
-		task_lock(p);
+		rcu_read_lock();
 		tracer = ptrace_parent(p);
 		if (tracer)
 			ptsid = task_sid(tracer);
-		task_unlock(p);
+		rcu_read_unlock();
 
 		if (tracer) {
 			error = avc_has_perm(ptsid, sid, SECCLASS_PROCESS,
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index b8d6d541d854..4088b816a3ee 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -26,7 +26,6 @@
 #include <sys/socket.h>
 #include <sys/poll.h>
 #include <sys/utsname.h>
-#include <linux/types.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index 8bcb04096eb2..520de3304571 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -22,7 +22,6 @@
 #include <sys/socket.h>
 #include <sys/poll.h>
 #include <sys/ioctl.h>
-#include <linux/types.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <mntent.h>