summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/stable/sysfs-bus-xen-backend9
-rw-r--r--Documentation/ABI/testing/sysfs-class-net18
-rw-r--r--Documentation/ABI/testing/sysfs-driver-xen-blkback10
-rw-r--r--Documentation/arm64/sve.txt4
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.txt14
-rw-r--r--Documentation/devicetree/bindings/watchdog/renesas-wdt.txt5
-rw-r--r--Documentation/hwmon/ina2xx2
-rw-r--r--Documentation/i2c/DMA-considerations10
-rw-r--r--MAINTAINERS8
-rw-r--r--Makefile5
-rw-r--r--[-rwxr-xr-x]arch/arm/boot/dts/am335x-osd3358-sm-red.dts0
-rw-r--r--arch/arm/boot/dts/am4372.dtsi1
-rw-r--r--arch/arm/boot/dts/imx23-evk.dts90
-rw-r--r--arch/arm/boot/dts/imx28-evk.dts183
-rw-r--r--arch/arm/boot/dts/imx7d.dtsi12
-rw-r--r--arch/arm/boot/dts/omap4-droid4-xt894.dts20
-rw-r--r--arch/arm/configs/imx_v6_v7_defconfig1
-rw-r--r--arch/arm/configs/mxs_defconfig1
-rw-r--r--arch/arm/configs/versatile_defconfig14
-rw-r--r--arch/arm/mach-omap2/omap_hwmod.c39
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/configs/defconfig3
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c29
-rw-r--r--arch/arm64/crypto/sm4-ce-glue.c2
-rw-r--r--arch/m68k/mac/misc.c10
-rw-r--r--arch/nios2/Kconfig.debug9
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/riscv/include/asm/tlb.h4
-rw-r--r--arch/riscv/kernel/sys_riscv.c15
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Makefile23
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S66
-rw-r--r--arch/x86/events/core.c2
-rw-r--r--arch/x86/include/asm/irqflags.h3
-rw-r--r--arch/x86/include/asm/pgtable-3level.h7
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/signal.h7
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h40
-rw-r--r--arch/x86/include/asm/vgtod.h2
-rw-r--r--arch/x86/kernel/alternative.c9
-rw-r--r--arch/x86/kernel/cpu/bugs.c46
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/dumpstack.c20
-rw-r--r--arch/x86/lib/usercopy.c5
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/pageattr.c25
-rw-r--r--arch/x86/mm/pti.c2
-rw-r--r--arch/x86/mm/tlb.c7
-rw-r--r--arch/x86/platform/efi/efi_32.c8
-rw-r--r--arch/x86/xen/mmu_pv.c9
-rw-r--r--block/blk-wbt.c89
-rw-r--r--block/bsg.c8
-rw-r--r--block/elevator.c3
-rw-r--r--drivers/ata/pata_ftide010.c27
-rw-r--r--drivers/base/power/clock_ops.c2
-rw-r--r--drivers/block/xen-blkback/blkback.c99
-rw-r--r--drivers/block/xen-blkback/common.h14
-rw-r--r--drivers/block/xen-blkfront.c110
-rw-r--r--drivers/bluetooth/Kconfig1
-rw-r--r--drivers/bluetooth/btmtkuart.c8
-rw-r--r--drivers/bus/ti-sysc.c37
-rw-r--r--drivers/cdrom/cdrom.c2
-rw-r--r--drivers/clk/clk-npcm7xx.c4
-rw-r--r--drivers/clk/x86/clk-st.c2
-rw-r--r--drivers/cpuidle/governors/menu.c13
-rw-r--r--drivers/crypto/caam/caamalg_qi.c6
-rw-r--r--drivers/crypto/caam/caampkc.c20
-rw-r--r--drivers/crypto/caam/jr.c3
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_dev.h3
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_lib.c1
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_reqmgr.c57
-rw-r--r--drivers/crypto/chelsio/chtls/chtls.h5
-rw-r--r--drivers/crypto/chelsio/chtls/chtls_main.c7
-rw-r--r--drivers/crypto/vmx/aes_cbc.c30
-rw-r--r--drivers/crypto/vmx/aes_xts.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_dpm.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dpm.c3
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c6
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c4
-rw-r--r--drivers/gpu/drm/i915/intel_audio.c3
-rw-r--r--drivers/gpu/drm/i915/intel_display.c7
-rw-r--r--drivers/gpu/drm/i915/intel_hdmi.c8
-rw-r--r--drivers/gpu/drm/i915/intel_lspcon.c2
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_ovl.c11
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_rdma.c92
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_crtc.c47
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_crtc.h3
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_ddp.c18
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h9
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_drv.c27
-rw-r--r--drivers/hwmon/adt7475.c25
-rw-r--r--drivers/hwmon/ina2xx.c13
-rw-r--r--drivers/hwmon/nct6775.c2
-rw-r--r--drivers/i2c/algos/i2c-algo-bit.c55
-rw-r--r--drivers/i2c/busses/i2c-designware-master.c1
-rw-r--r--drivers/i2c/busses/i2c-designware-platdrv.c7
-rw-r--r--drivers/i2c/busses/i2c-i801.c9
-rw-r--r--drivers/i2c/busses/i2c-sh_mobile.c15
-rw-r--r--drivers/i2c/i2c-core-base.c11
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/core/addr.c406
-rw-r--r--drivers/infiniband/core/cache.c141
-rw-r--r--drivers/infiniband/core/cm.c9
-rw-r--r--drivers/infiniband/core/cma.c263
-rw-r--r--drivers/infiniband/core/cma_configfs.c2
-rw-r--r--drivers/infiniband/core/core_priv.h12
-rw-r--r--drivers/infiniband/core/cq.c10
-rw-r--r--drivers/infiniband/core/device.c170
-rw-r--r--drivers/infiniband/core/fmr_pool.c5
-rw-r--r--drivers/infiniband/core/iwcm.c2
-rw-r--r--drivers/infiniband/core/mad.c80
-rw-r--r--drivers/infiniband/core/mad_priv.h2
-rw-r--r--drivers/infiniband/core/netlink.c4
-rw-r--r--drivers/infiniband/core/nldev.c37
-rw-r--r--drivers/infiniband/core/rdma_core.c58
-rw-r--r--drivers/infiniband/core/rdma_core.h1
-rw-r--r--drivers/infiniband/core/restrack.c30
-rw-r--r--drivers/infiniband/core/sa.h8
-rw-r--r--drivers/infiniband/core/sa_query.c70
-rw-r--r--drivers/infiniband/core/security.c7
-rw-r--r--drivers/infiniband/core/sysfs.c95
-rw-r--r--drivers/infiniband/core/ucma.c8
-rw-r--r--drivers/infiniband/core/umem.c125
-rw-r--r--drivers/infiniband/core/umem_odp.c621
-rw-r--r--drivers/infiniband/core/user_mad.c13
-rw-r--r--drivers/infiniband/core/uverbs.h15
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c111
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c140
-rw-r--r--drivers/infiniband/core/uverbs_main.c346
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c7
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c13
-rw-r--r--drivers/infiniband/core/verbs.c19
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h3
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c11
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.h3
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c6
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c216
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c136
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c88
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h4
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c29
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c77
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h10
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h5
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c55
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c50
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c16
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile42
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c4
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c492
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h71
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h4
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c4
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h48
-rw-r--r--drivers/infiniband/hw/hfi1/init.c113
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.c94
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h192
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c4
-rw-r--r--drivers/infiniband/hw/hfi1/msix.c363
-rw-r--r--drivers/infiniband/hw/hfi1/msix.h64
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c74
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c59
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h2
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c100
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h31
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c24
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c382
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c56
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h21
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c69
-rw-r--r--drivers/infiniband/hw/hfi1/trace.h3
-rw-r--r--drivers/infiniband/hw/hfi1/trace_iowait.h54
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c14
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c22
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c139
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h20
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c259
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h35
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.h11
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c12
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c21
-rw-r--r--drivers/infiniband/hw/hns/Kconfig1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c6
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h45
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c629
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h96
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c123
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c212
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c41
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c73
-rw-r--r--drivers/infiniband/hw/mlx4/Kconfig1
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c20
-rw-r--r--drivers/infiniband/hw/mlx4/main.c190
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c2
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h5
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c8
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c6
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c129
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h14
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c1
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c363
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c393
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c3
-rw-r--r--drivers/infiniband/hw/mlx5/main.c510
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c9
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h96
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c26
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c123
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c302
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c5
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c44
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c4
-rw-r--r--drivers/infiniband/hw/nes/nes.c3
-rw-r--r--drivers/infiniband/hw/nes/nes.h9
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c63
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c74
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c3
-rw-r--r--drivers/infiniband/hw/qedr/main.c73
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h2
-rw-r--r--drivers/infiniband/hw/qedr/qedr_roce_cm.c4
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c5
-rw-r--r--drivers/infiniband/hw/qib/qib.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c17
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c18
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c342
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c101
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c17
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c47
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h15
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.c3
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c39
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c74
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c16
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c91
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.h3
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c46
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/Kconfig2
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c677
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_tx.h42
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c15
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c39
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c35
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c49
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c55
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c18
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c17
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c10
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c10
-rw-r--r--drivers/infiniband/sw/rxe/rxe_sysfs.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c10
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c36
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c2
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c18
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c9
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c2
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c3
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c3
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c16
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c28
-rw-r--r--drivers/mmc/core/queue.c12
-rw-r--r--drivers/mmc/core/queue.h1
-rw-r--r--drivers/mmc/host/android-goldfish.c4
-rw-r--r--drivers/mmc/host/atmel-mci.c12
-rw-r--r--drivers/mmc/host/renesas_sdhi_internal_dmac.c10
-rw-r--r--drivers/mtd/nand/raw/denali.c5
-rw-r--r--drivers/mtd/nand/raw/docg4.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c6
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c36
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c10
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c5
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_msg.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hnae.h6
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_enet.c108
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c3
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.h6
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_ethtool.c7
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c15
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h15
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h25
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c30
-rw-r--r--drivers/net/ethernet/intel/ice/ice_controlq.c29
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c52
-rw-r--r--drivers/net/ethernet/intel/ice/ice_hw_autogen.h8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c115
-rw-r--r--drivers/net/ethernet/intel/ice/ice_nvm.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.c3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.h6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h16
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c7
-rw-r--r--drivers/net/ethernet/intel/ixgb/ixgb_main.c5
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c36
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c31
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_type.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c20
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c6
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_init_ops.c2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.c187
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.h27
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_reg_addr.h2
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_filter.c6
-rw-r--r--drivers/net/ethernet/qlogic/qlge/qlge_main.c23
-rw-r--r--drivers/net/ethernet/renesas/ravb.h5
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c5
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c13
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.h13
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Kconfig10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c5
-rw-r--r--drivers/net/hyperv/netvsc_drv.c5
-rw-r--r--drivers/net/usb/r8152.c4
-rw-r--r--drivers/nvme/host/pci.c8
-rw-r--r--drivers/nvme/target/core.c4
-rw-r--r--drivers/nvme/target/fcloop.c3
-rw-r--r--drivers/of/base.c47
-rw-r--r--drivers/thermal/of-thermal.c7
-rw-r--r--drivers/thermal/qoriq_thermal.c27
-rw-r--r--drivers/thermal/rcar_gen3_thermal.c11
-rw-r--r--drivers/thermal/rcar_thermal.c16
-rw-r--r--drivers/vhost/vhost.c2
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c9
-rw-r--r--fs/buffer.c1
-rw-r--r--fs/isofs/inode.c7
-rw-r--r--fs/notify/mark.c6
-rw-r--r--fs/quota/quota.c14
-rw-r--r--fs/udf/super.c93
-rw-r--r--include/linux/arm-smccc.h38
-rw-r--r--include/linux/i2c.h2
-rw-r--r--include/linux/of.h33
-rw-r--r--include/linux/platform_data/ina2xx.h2
-rw-r--r--include/linux/qed/qed_rdma_if.h11
-rw-r--r--include/linux/quota.h8
-rw-r--r--include/net/act_api.h7
-rw-r--r--include/net/pkt_cls.h25
-rw-r--r--include/rdma/ib_addr.h11
-rw-r--r--include/rdma/ib_cm.h2
-rw-r--r--include/rdma/ib_sa.h38
-rw-r--r--include/rdma/ib_umem.h9
-rw-r--r--include/rdma/ib_umem_odp.h75
-rw-r--r--include/rdma/ib_verbs.h150
-rw-r--r--include/rdma/rdma_cm.h11
-rw-r--r--include/rdma/rdma_netlink.h4
-rw-r--r--include/rdma/rdma_vt.h51
-rw-r--r--include/rdma/rdmavt_qp.h7
-rw-r--r--include/rdma/restrack.h12
-rw-r--r--include/rdma/uverbs_ioctl.h111
-rw-r--r--include/rdma/uverbs_std_types.h51
-rw-r--r--include/uapi/rdma/ib_user_verbs.h20
-rw-r--r--include/uapi/rdma/mlx5-abi.h15
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_cmds.h21
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_verbs.h12
-rw-r--r--include/uapi/rdma/rdma_netlink.h3
-rw-r--r--include/uapi/rdma/rdma_user_ioctl_cmds.h7
-rw-r--r--kernel/bpf/hashtab.c23
-rw-r--r--kernel/bpf/sockmap.c11
-rw-r--r--kernel/cpu.c26
-rw-r--r--kernel/printk/printk.c1
-rw-r--r--kernel/watchdog.c4
-rw-r--r--kernel/watchdog_hld.c2
-rw-r--r--kernel/workqueue.c2
-rw-r--r--lib/percpu_counter.c1
-rw-r--r--lib/rhashtable.c1
-rw-r--r--mm/page-writeback.c1
-rw-r--r--mm/page_alloc.c1
-rw-r--r--mm/slub.c1
-rw-r--r--net/core/dev.c1
-rw-r--r--net/dsa/slave.c4
-rw-r--r--net/ipv4/tcp_bbr.c42
-rw-r--r--net/ipv4/tcp_ipv4.c6
-rw-r--r--net/ipv6/addrconf.c6
-rw-r--r--net/ipv6/ip6_fib.c2
-rw-r--r--net/ipv6/ip6_vti.c3
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ncsi/ncsi-netlink.c4
-rw-r--r--net/rds/tcp.c1
-rw-r--r--net/sched/act_api.c70
-rw-r--r--net/sched/act_bpf.c8
-rw-r--r--net/sched/act_connmark.c8
-rw-r--r--net/sched/act_csum.c8
-rw-r--r--net/sched/act_gact.c8
-rw-r--r--net/sched/act_ife.c92
-rw-r--r--net/sched/act_ipt.c16
-rw-r--r--net/sched/act_mirred.c8
-rw-r--r--net/sched/act_nat.c8
-rw-r--r--net/sched/act_pedit.c8
-rw-r--r--net/sched/act_police.c8
-rw-r--r--net/sched/act_sample.c8
-rw-r--r--net/sched/act_simple.c8
-rw-r--r--net/sched/act_skbedit.c8
-rw-r--r--net/sched/act_skbmod.c8
-rw-r--r--net/sched/act_tunnel_key.c8
-rw-r--r--net/sched/act_vlan.c8
-rw-r--r--net/sched/cls_u32.c10
-rw-r--r--net/sched/sch_cake.c24
-rw-r--r--net/tls/tls_main.c9
-rw-r--r--net/xdp/xdp_umem.c4
-rw-r--r--scripts/Kbuild.include4
-rw-r--r--scripts/Makefile.build2
-rw-r--r--tools/bpf/bpftool/map_perf_ring.c5
445 files changed, 9824 insertions, 6879 deletions
diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend b/Documentation/ABI/stable/sysfs-bus-xen-backend
index 3d5951c8bf5f..e8b60bd766f7 100644
--- a/Documentation/ABI/stable/sysfs-bus-xen-backend
+++ b/Documentation/ABI/stable/sysfs-bus-xen-backend
@@ -73,3 +73,12 @@ KernelVersion: 3.0
Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Description:
Number of sectors written by the frontend.
+
+What: /sys/bus/xen-backend/devices/*/state
+Date: August 2018
+KernelVersion: 4.19
+Contact: Joe Jin <joe.jin@oracle.com>
+Description:
+ The state of the device. One of: 'Unknown',
+ 'Initialising', 'Initialised', 'Connected', 'Closing',
+ 'Closed', 'Reconfiguring', 'Reconfigured'.
diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index 2f1788111cd9..ec2232f6a949 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -91,6 +91,24 @@ Description:
stacked (e.g: VLAN interfaces) but still have the same MAC
address as their parent device.
+What: /sys/class/net/<iface>/dev_port
+Date: February 2014
+KernelVersion: 3.15
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the port number of this network device, formatted
+ as a decimal value. Some NICs have multiple independent ports
+ on the same PCI bus, device and function. This attribute allows
+ userspace to distinguish the respective interfaces.
+
+ Note: some device drivers started to use 'dev_id' for this
+ purpose since long before 3.15 and have not adopted the new
+ attribute ever since. To query the port number, some tools look
+ exclusively at 'dev_port', while others only consult 'dev_id'.
+ If a network device has multiple client adapter ports as
+ described in the previous paragraph and does not set this
+ attribute to its port number, it's a kernel bug.
+
What: /sys/class/net/<iface>/dormant
Date: March 2006
KernelVersion: 2.6.17
diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkback b/Documentation/ABI/testing/sysfs-driver-xen-blkback
index 8bb43b66eb55..4e7babb3ba1f 100644
--- a/Documentation/ABI/testing/sysfs-driver-xen-blkback
+++ b/Documentation/ABI/testing/sysfs-driver-xen-blkback
@@ -15,3 +15,13 @@ Description:
blkback. If the frontend tries to use more than
max_persistent_grants, the LRU kicks in and starts
removing 5% of max_persistent_grants every 100ms.
+
+What: /sys/module/xen_blkback/parameters/persistent_grant_unused_seconds
+Date: August 2018
+KernelVersion: 4.19
+Contact: Roger Pau Monné <roger.pau@citrix.com>
+Description:
+ How long a persistent grant is allowed to remain
+ allocated without being in use. The time is in
+ seconds, 0 means indefinitely long.
+ The default is 60 seconds.
diff --git a/Documentation/arm64/sve.txt b/Documentation/arm64/sve.txt
index f128f736b4a5..7169a0ec41d8 100644
--- a/Documentation/arm64/sve.txt
+++ b/Documentation/arm64/sve.txt
@@ -200,7 +200,7 @@ prctl(PR_SVE_SET_VL, unsigned long arg)
thread.
* Changing the vector length causes all of P0..P15, FFR and all bits of
- Z0..V31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
+ Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
unspecified. Calling PR_SVE_SET_VL with vl equal to the thread's current
vector length, or calling PR_SVE_SET_VL with the PR_SVE_SET_VL_ONEXEC
flag, does not constitute a change to the vector length for this purpose.
@@ -500,7 +500,7 @@ References
[2] arch/arm64/include/uapi/asm/ptrace.h
AArch64 Linux ptrace ABI definitions
-[3] linux/Documentation/arm64/cpu-feature-registers.txt
+[3] Documentation/arm64/cpu-feature-registers.txt
[4] ARM IHI0055C
http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055c/IHI0055C_beta_aapcs64.pdf
diff --git a/Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.txt
index b0a8af51c388..265b223cd978 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.txt
@@ -11,7 +11,7 @@ The RISC-V supervisor ISA manual specifies three interrupt sources that are
attached to every HLIC: software interrupts, the timer interrupt, and external
interrupts. Software interrupts are used to send IPIs between cores. The
timer interrupt comes from an architecturally mandated real-time timer that is
-controller via Supervisor Binary Interface (SBI) calls and CSR reads. External
+controlled via Supervisor Binary Interface (SBI) calls and CSR reads. External
interrupts connect all other device interrupts to the HLIC, which are routed
via the platform-level interrupt controller (PLIC).
@@ -25,7 +25,15 @@ in the system.
Required properties:
- compatible : "riscv,cpu-intc"
-- #interrupt-cells : should be <1>
+- #interrupt-cells : should be <1>. The interrupt sources are defined by the
+ RISC-V supervisor ISA manual, with only the following three interrupts being
+ defined for supervisor mode:
+ - Source 1 is the supervisor software interrupt, which can be sent by an SBI
+ call and is reserved for use by software.
+ - Source 5 is the supervisor timer interrupt, which can be configured by
+ SBI calls and implements a one-shot timer.
+ - Source 9 is the supervisor external interrupt, which chains to all other
+ device interrupts.
- interrupt-controller : Identifies the node as an interrupt controller
Furthermore, this interrupt-controller MUST be embedded inside the cpu
@@ -38,7 +46,7 @@ An example device tree entry for a HLIC is show below.
...
cpu1-intc: interrupt-controller {
#interrupt-cells = <1>;
- compatible = "riscv,cpu-intc", "sifive,fu540-c000-cpu-intc";
+ compatible = "sifive,fu540-c000-cpu-intc", "riscv,cpu-intc";
interrupt-controller;
};
};
diff --git a/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt b/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt
index 5d47a262474c..9407212a85a8 100644
--- a/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt
@@ -7,6 +7,7 @@ Required properties:
Examples with soctypes are:
- "renesas,r8a7743-wdt" (RZ/G1M)
- "renesas,r8a7745-wdt" (RZ/G1E)
+ - "renesas,r8a774a1-wdt" (RZ/G2M)
- "renesas,r8a7790-wdt" (R-Car H2)
- "renesas,r8a7791-wdt" (R-Car M2-W)
- "renesas,r8a7792-wdt" (R-Car V2H)
@@ -21,8 +22,8 @@ Required properties:
- "renesas,r7s72100-wdt" (RZ/A1)
The generic compatible string must be:
- "renesas,rza-wdt" for RZ/A
- - "renesas,rcar-gen2-wdt" for R-Car Gen2 and RZ/G
- - "renesas,rcar-gen3-wdt" for R-Car Gen3
+ - "renesas,rcar-gen2-wdt" for R-Car Gen2 and RZ/G1
+ - "renesas,rcar-gen3-wdt" for R-Car Gen3 and RZ/G2
- reg : Should contain WDT registers location and length
- clocks : the clock feeding the watchdog timer.
diff --git a/Documentation/hwmon/ina2xx b/Documentation/hwmon/ina2xx
index 72d16f08e431..b8df81f6d6bc 100644
--- a/Documentation/hwmon/ina2xx
+++ b/Documentation/hwmon/ina2xx
@@ -32,7 +32,7 @@ Supported chips:
Datasheet: Publicly available at the Texas Instruments website
http://www.ti.com/
-Author: Lothar Felten <l-felten@ti.com>
+Author: Lothar Felten <lothar.felten@gmail.com>
Description
-----------
diff --git a/Documentation/i2c/DMA-considerations b/Documentation/i2c/DMA-considerations
index 966610aa4620..203002054120 100644
--- a/Documentation/i2c/DMA-considerations
+++ b/Documentation/i2c/DMA-considerations
@@ -50,10 +50,14 @@ bounce buffer. But you don't need to care about that detail, just use the
returned buffer. If NULL is returned, the threshold was not met or a bounce
buffer could not be allocated. Fall back to PIO in that case.
-In any case, a buffer obtained from above needs to be released. It ensures data
-is copied back to the message and a potentially used bounce buffer is freed::
+In any case, a buffer obtained from above needs to be released. Another helper
+function ensures a potentially used bounce buffer is freed::
- i2c_release_dma_safe_msg_buf(msg, dma_buf);
+ i2c_put_dma_safe_msg_buf(dma_buf, msg, xferred);
+
+The last argument 'xferred' controls if the buffer is synced back to the
+message or not. No syncing is needed in cases setting up DMA had an error and
+there was no data transferred.
The bounce buffer handling from the core is generic and simple. It will always
allocate a new bounce buffer. If you want a more sophisticated handling (e.g.
diff --git a/MAINTAINERS b/MAINTAINERS
index a5b256b25905..9ad052aeac39 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8255,9 +8255,9 @@ F: drivers/ata/pata_arasan_cf.c
LIBATA PATA DRIVERS
M: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
-M: Jens Axboe <kernel.dk>
+M: Jens Axboe <axboe@kernel.dk>
L: linux-ide@vger.kernel.org
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
S: Maintained
F: drivers/ata/pata_*.c
F: drivers/ata/ata_generic.c
@@ -8275,7 +8275,7 @@ LIBATA SATA AHCI PLATFORM devices support
M: Hans de Goede <hdegoede@redhat.com>
M: Jens Axboe <axboe@kernel.dk>
L: linux-ide@vger.kernel.org
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
S: Maintained
F: drivers/ata/ahci_platform.c
F: drivers/ata/libahci_platform.c
@@ -8291,7 +8291,7 @@ F: drivers/ata/sata_promise.*
LIBATA SUBSYSTEM (Serial and Parallel ATA drivers)
M: Jens Axboe <axboe@kernel.dk>
L: linux-ide@vger.kernel.org
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
S: Maintained
F: drivers/ata/
F: include/linux/ata.h
diff --git a/Makefile b/Makefile
index 2b458801ba74..19948e556941 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 4
PATCHLEVEL = 19
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
NAME = Merciless Moray
# *DOCUMENTATION*
@@ -807,6 +807,9 @@ KBUILD_CFLAGS += $(call cc-option,-Wdeclaration-after-statement,)
# disable pointer signed / unsigned warnings in gcc 4.0
KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
+# disable stringop warnings in gcc 8+
+KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
+
# disable invalid "can't wrap" optimizations for signed / pointers
KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
diff --git a/arch/arm/boot/dts/am335x-osd3358-sm-red.dts b/arch/arm/boot/dts/am335x-osd3358-sm-red.dts
index 4d969013f99a..4d969013f99a 100755..100644
--- a/arch/arm/boot/dts/am335x-osd3358-sm-red.dts
+++ b/arch/arm/boot/dts/am335x-osd3358-sm-red.dts
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index f0cbd86312dc..d4b7c59eec68 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -469,6 +469,7 @@
ti,hwmods = "rtc";
clocks = <&clk_32768_ck>;
clock-names = "int-clk";
+ system-power-controller;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/imx23-evk.dts b/arch/arm/boot/dts/imx23-evk.dts
index 9fb47724b9c1..ad2ae25b7b4d 100644
--- a/arch/arm/boot/dts/imx23-evk.dts
+++ b/arch/arm/boot/dts/imx23-evk.dts
@@ -13,6 +13,43 @@
reg = <0x40000000 0x08000000>;
};
+ reg_vddio_sd0: regulator-vddio-sd0 {
+ compatible = "regulator-fixed";
+ regulator-name = "vddio-sd0";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ gpio = <&gpio1 29 0>;
+ };
+
+ reg_lcd_3v3: regulator-lcd-3v3 {
+ compatible = "regulator-fixed";
+ regulator-name = "lcd-3v3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ gpio = <&gpio1 18 0>;
+ enable-active-high;
+ };
+
+ reg_lcd_5v: regulator-lcd-5v {
+ compatible = "regulator-fixed";
+ regulator-name = "lcd-5v";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ };
+
+ panel {
+ compatible = "sii,43wvf1g";
+ backlight = <&backlight_display>;
+ dvdd-supply = <&reg_lcd_3v3>;
+ avdd-supply = <&reg_lcd_5v>;
+
+ port {
+ panel_in: endpoint {
+ remote-endpoint = <&display_out>;
+ };
+ };
+ };
+
apb@80000000 {
apbh@80000000 {
gpmi-nand@8000c000 {
@@ -52,31 +89,11 @@
lcdif@80030000 {
pinctrl-names = "default";
pinctrl-0 = <&lcdif_24bit_pins_a>;
- lcd-supply = <&reg_lcd_3v3>;
- display = <&display0>;
status = "okay";
- display0: display0 {
- bits-per-pixel = <32>;
- bus-width = <24>;
-
- display-timings {
- native-mode = <&timing0>;
- timing0: timing0 {
- clock-frequency = <9200000>;
- hactive = <480>;
- vactive = <272>;
- hback-porch = <15>;
- hfront-porch = <8>;
- vback-porch = <12>;
- vfront-porch = <4>;
- hsync-len = <1>;
- vsync-len = <1>;
- hsync-active = <0>;
- vsync-active = <0>;
- de-active = <1>;
- pixelclk-active = <0>;
- };
+ port {
+ display_out: endpoint {
+ remote-endpoint = <&panel_in>;
};
};
};
@@ -118,32 +135,7 @@
};
};
- regulators {
- compatible = "simple-bus";
- #address-cells = <1>;
- #size-cells = <0>;
-
- reg_vddio_sd0: regulator@0 {
- compatible = "regulator-fixed";
- reg = <0>;
- regulator-name = "vddio-sd0";
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
- gpio = <&gpio1 29 0>;
- };
-
- reg_lcd_3v3: regulator@1 {
- compatible = "regulator-fixed";
- reg = <1>;
- regulator-name = "lcd-3v3";
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
- gpio = <&gpio1 18 0>;
- enable-active-high;
- };
- };
-
- backlight {
+ backlight_display: backlight {
compatible = "pwm-backlight";
pwms = <&pwm 2 5000000>;
brightness-levels = <0 4 8 16 32 64 128 255>;
diff --git a/arch/arm/boot/dts/imx28-evk.dts b/arch/arm/boot/dts/imx28-evk.dts
index 6b0ae667640f..93ab5bdfe068 100644
--- a/arch/arm/boot/dts/imx28-evk.dts
+++ b/arch/arm/boot/dts/imx28-evk.dts
@@ -13,6 +13,87 @@
reg = <0x40000000 0x08000000>;
};
+
+ reg_3p3v: regulator-3p3v {
+ compatible = "regulator-fixed";
+ regulator-name = "3P3V";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ reg_vddio_sd0: regulator-vddio-sd0 {
+ compatible = "regulator-fixed";
+ regulator-name = "vddio-sd0";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ gpio = <&gpio3 28 0>;
+ };
+
+ reg_fec_3v3: regulator-fec-3v3 {
+ compatible = "regulator-fixed";
+ regulator-name = "fec-3v3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ gpio = <&gpio2 15 0>;
+ };
+
+ reg_usb0_vbus: regulator-usb0-vbus {
+ compatible = "regulator-fixed";
+ regulator-name = "usb0_vbus";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ gpio = <&gpio3 9 0>;
+ enable-active-high;
+ };
+
+ reg_usb1_vbus: regulator-usb1-vbus {
+ compatible = "regulator-fixed";
+ regulator-name = "usb1_vbus";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ gpio = <&gpio3 8 0>;
+ enable-active-high;
+ };
+
+ reg_lcd_3v3: regulator-lcd-3v3 {
+ compatible = "regulator-fixed";
+ regulator-name = "lcd-3v3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ gpio = <&gpio3 30 0>;
+ enable-active-high;
+ };
+
+ reg_can_3v3: regulator-can-3v3 {
+ compatible = "regulator-fixed";
+ regulator-name = "can-3v3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ gpio = <&gpio2 13 0>;
+ enable-active-high;
+ };
+
+ reg_lcd_5v: regulator-lcd-5v {
+ compatible = "regulator-fixed";
+ regulator-name = "lcd-5v";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ };
+
+ panel {
+ compatible = "sii,43wvf1g";
+ backlight = <&backlight_display>;
+ dvdd-supply = <&reg_lcd_3v3>;
+ avdd-supply = <&reg_lcd_5v>;
+
+ port {
+ panel_in: endpoint {
+ remote-endpoint = <&display_out>;
+ };
+ };
+ };
+
apb@80000000 {
apbh@80000000 {
gpmi-nand@8000c000 {
@@ -116,31 +197,11 @@
pinctrl-names = "default";
pinctrl-0 = <&lcdif_24bit_pins_a
&lcdif_pins_evk>;
- lcd-supply = <&reg_lcd_3v3>;
- display = <&display0>;
status = "okay";
- display0: display0 {
- bits-per-pixel = <32>;
- bus-width = <24>;
-
- display-timings {
- native-mode = <&timing0>;
- timing0: timing0 {
- clock-frequency = <33500000>;
- hactive = <800>;
- vactive = <480>;
- hback-porch = <89>;
- hfront-porch = <164>;
- vback-porch = <23>;
- vfront-porch = <10>;
- hsync-len = <10>;
- vsync-len = <10>;
- hsync-active = <0>;
- vsync-active = <0>;
- de-active = <1>;
- pixelclk-active = <0>;
- };
+ port {
+ display_out: endpoint {
+ remote-endpoint = <&panel_in>;
};
};
};
@@ -269,80 +330,6 @@
};
};
- regulators {
- compatible = "simple-bus";
- #address-cells = <1>;
- #size-cells = <0>;
-
- reg_3p3v: regulator@0 {
- compatible = "regulator-fixed";
- reg = <0>;
- regulator-name = "3P3V";
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
- regulator-always-on;
- };
-
- reg_vddio_sd0: regulator@1 {
- compatible = "regulator-fixed";
- reg = <1>;
- regulator-name = "vddio-sd0";
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
- gpio = <&gpio3 28 0>;
- };
-
- reg_fec_3v3: regulator@2 {
- compatible = "regulator-fixed";
- reg = <2>;
- regulator-name = "fec-3v3";
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
- gpio = <&gpio2 15 0>;
- };
-
- reg_usb0_vbus: regulator@3 {
- compatible = "regulator-fixed";
- reg = <3>;
- regulator-name = "usb0_vbus";
- regulator-min-microvolt = <5000000>;
- regulator-max-microvolt = <5000000>;
- gpio = <&gpio3 9 0>;
- enable-active-high;
- };
-
- reg_usb1_vbus: regulator@4 {
- compatible = "regulator-fixed";
- reg = <4>;
- regulator-name = "usb1_vbus";
- regulator-min-microvolt = <5000000>;
- regulator-max-microvolt = <5000000>;
- gpio = <&gpio3 8 0>;
- enable-active-high;
- };
-
- reg_lcd_3v3: regulator@5 {
- compatible = "regulator-fixed";
- reg = <5>;
- regulator-name = "lcd-3v3";
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
- gpio = <&gpio3 30 0>;
- enable-active-high;
- };
-
- reg_can_3v3: regulator@6 {
- compatible = "regulator-fixed";
- reg = <6>;
- regulator-name = "can-3v3";
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
- gpio = <&gpio2 13 0>;
- enable-active-high;
- };
-
- };
-
sound {
compatible = "fsl,imx28-evk-sgtl5000",
"fsl,mxs-audio-sgtl5000";
@@ -363,7 +350,7 @@
};
};
- backlight {
+ backlight_display: backlight {
compatible = "pwm-backlight";
pwms = <&pwm 2 5000000>;
brightness-levels = <0 4 8 16 32 64 128 255>;
diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi
index 7cbc2ffa4b3a..7234e8330a57 100644
--- a/arch/arm/boot/dts/imx7d.dtsi
+++ b/arch/arm/boot/dts/imx7d.dtsi
@@ -126,10 +126,14 @@
interrupt-names = "msi";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
- interrupt-map = <0 0 0 1 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>,
- <0 0 0 2 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>,
- <0 0 0 3 &intc GIC_SPI 124 IRQ_TYPE_LEVEL_HIGH>,
- <0 0 0 4 &intc GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>;
+ /*
+ * Reference manual lists pci irqs incorrectly
+ * Real hardware ordering is same as imx6: D+MSI, C, B, A
+ */
+ interrupt-map = <0 0 0 1 &intc GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 2 &intc GIC_SPI 124 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 3 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 4 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX7D_PCIE_CTRL_ROOT_CLK>,
<&clks IMX7D_PLL_ENET_MAIN_100M_CLK>,
<&clks IMX7D_PCIE_PHY_ROOT_CLK>;
diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts
index 12d6822f0057..04758a2a87f0 100644
--- a/arch/arm/boot/dts/omap4-droid4-xt894.dts
+++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts
@@ -354,7 +354,7 @@
&mmc2 {
vmmc-supply = <&vsdio>;
bus-width = <8>;
- non-removable;
+ ti,non-removable;
};
&mmc3 {
@@ -621,15 +621,6 @@
OMAP4_IOPAD(0x10c, PIN_INPUT | MUX_MODE1) /* abe_mcbsp3_fsx */
>;
};
-};
-
-&omap4_pmx_wkup {
- usb_gpio_mux_sel2: pinmux_usb_gpio_mux_sel2_pins {
- /* gpio_wk0 */
- pinctrl-single,pins = <
- OMAP4_IOPAD(0x040, PIN_OUTPUT_PULLDOWN | MUX_MODE3)
- >;
- };
vibrator_direction_pin: pinmux_vibrator_direction_pin {
pinctrl-single,pins = <
@@ -644,6 +635,15 @@
};
};
+&omap4_pmx_wkup {
+ usb_gpio_mux_sel2: pinmux_usb_gpio_mux_sel2_pins {
+ /* gpio_wk0 */
+ pinctrl-single,pins = <
+ OMAP4_IOPAD(0x040, PIN_OUTPUT_PULLDOWN | MUX_MODE3)
+ >;
+ };
+};
+
/*
* As uart1 is wired to mdm6600 with rts and cts, we can use the cts pin for
* uart1 wakeirq.
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index e2c127608bcc..7eca43ff69bb 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -257,6 +257,7 @@ CONFIG_IMX_IPUV3_CORE=y
CONFIG_DRM=y
CONFIG_DRM_PANEL_LVDS=y
CONFIG_DRM_PANEL_SIMPLE=y
+CONFIG_DRM_PANEL_SEIKO_43WVF1G=y
CONFIG_DRM_DW_HDMI_AHB_AUDIO=m
CONFIG_DRM_DW_HDMI_CEC=y
CONFIG_DRM_IMX=y
diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig
index 148226e36152..7b8212857535 100644
--- a/arch/arm/configs/mxs_defconfig
+++ b/arch/arm/configs/mxs_defconfig
@@ -95,6 +95,7 @@ CONFIG_MFD_MXS_LRADC=y
CONFIG_REGULATOR=y
CONFIG_REGULATOR_FIXED_VOLTAGE=y
CONFIG_DRM=y
+CONFIG_DRM_PANEL_SEIKO_43WVF1G=y
CONFIG_DRM_MXSFB=y
CONFIG_FB_MODE_HELPERS=y
CONFIG_BACKLIGHT_LCD_SUPPORT=y
diff --git a/arch/arm/configs/versatile_defconfig b/arch/arm/configs/versatile_defconfig
index df68dc4056e5..5282324c7cef 100644
--- a/arch/arm/configs/versatile_defconfig
+++ b/arch/arm/configs/versatile_defconfig
@@ -5,19 +5,19 @@ CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_PARTITION_ADVANCED=y
# CONFIG_ARCH_MULTI_V7 is not set
CONFIG_ARCH_VERSATILE=y
CONFIG_AEABI=y
CONFIG_OABI_COMPAT=y
-CONFIG_CMA=y
CONFIG_ZBOOT_ROM_TEXT=0x0
CONFIG_ZBOOT_ROM_BSS=0x0
CONFIG_CMDLINE="root=1f03 mem=32M"
CONFIG_FPE_NWFPE=y
CONFIG_VFP=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_CMA=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -59,6 +59,7 @@ CONFIG_GPIO_PL061=y
CONFIG_DRM=y
CONFIG_DRM_PANEL_ARM_VERSATILE=y
CONFIG_DRM_PANEL_SIMPLE=y
+CONFIG_DRM_DUMB_VGA_DAC=y
CONFIG_DRM_PL111=y
CONFIG_FB_MODE_HELPERS=y
CONFIG_BACKLIGHT_LCD_SUPPORT=y
@@ -89,9 +90,10 @@ CONFIG_NFSD=y
CONFIG_NFSD_V3=y
CONFIG_NLS_CODEPAGE_850=m
CONFIG_NLS_ISO8859_1=m
+CONFIG_FONTS=y
+CONFIG_FONT_ACORN_8x8=y
+CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_USER=y
CONFIG_DEBUG_LL=y
-CONFIG_FONTS=y
-CONFIG_FONT_ACORN_8x8=y
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 2ceffd85dd3d..cd65ea4e9c54 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -2161,6 +2161,37 @@ static int of_dev_hwmod_lookup(struct device_node *np,
}
/**
+ * omap_hwmod_fix_mpu_rt_idx - fix up mpu_rt_idx register offsets
+ *
+ * @oh: struct omap_hwmod *
+ * @np: struct device_node *
+ *
+ * Fix up module register offsets for modules with mpu_rt_idx.
+ * Only needed for cpsw with interconnect target module defined
+ * in device tree while still using legacy hwmod platform data
+ * for rev, sysc and syss registers.
+ *
+ * Can be removed when all cpsw hwmod platform data has been
+ * dropped.
+ */
+static void omap_hwmod_fix_mpu_rt_idx(struct omap_hwmod *oh,
+ struct device_node *np,
+ struct resource *res)
+{
+ struct device_node *child = NULL;
+ int error;
+
+ child = of_get_next_child(np, child);
+ if (!child)
+ return;
+
+ error = of_address_to_resource(child, oh->mpu_rt_idx, res);
+ if (error)
+ pr_err("%s: error mapping mpu_rt_idx: %i\n",
+ __func__, error);
+}
+
+/**
* omap_hwmod_parse_module_range - map module IO range from device tree
* @oh: struct omap_hwmod *
* @np: struct device_node *
@@ -2220,7 +2251,13 @@ int omap_hwmod_parse_module_range(struct omap_hwmod *oh,
size = be32_to_cpup(ranges);
pr_debug("omap_hwmod: %s %s at 0x%llx size 0x%llx\n",
- oh->name, np->name, base, size);
+ oh ? oh->name : "", np->name, base, size);
+
+ if (oh && oh->mpu_rt_idx) {
+ omap_hwmod_fix_mpu_rt_idx(oh, np, res);
+
+ return 0;
+ }
res->start = base;
res->end = base + size - 1;
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 29e75b47becd..1b1a0e95c751 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -763,7 +763,6 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK
config HOLES_IN_ZONE
def_bool y
- depends on NUMA
source kernel/Kconfig.hz
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index f67e8d5e93ad..db8d364f8476 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -38,6 +38,7 @@ CONFIG_ARCH_BCM_IPROC=y
CONFIG_ARCH_BERLIN=y
CONFIG_ARCH_BRCMSTB=y
CONFIG_ARCH_EXYNOS=y
+CONFIG_ARCH_K3=y
CONFIG_ARCH_LAYERSCAPE=y
CONFIG_ARCH_LG1K=y
CONFIG_ARCH_HISI=y
@@ -605,6 +606,8 @@ CONFIG_ARCH_TEGRA_132_SOC=y
CONFIG_ARCH_TEGRA_210_SOC=y
CONFIG_ARCH_TEGRA_186_SOC=y
CONFIG_ARCH_TEGRA_194_SOC=y
+CONFIG_ARCH_K3_AM6_SOC=y
+CONFIG_SOC_TI=y
CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
CONFIG_EXTCON_USB_GPIO=y
CONFIG_EXTCON_USBC_CROS_EC=y
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 6e9f33d14930..067d8937d5af 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -417,7 +417,7 @@ static int gcm_encrypt(struct aead_request *req)
__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds);
put_unaligned_be32(2, iv + GCM_IV_SIZE);
- while (walk.nbytes >= AES_BLOCK_SIZE) {
+ while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) {
int blocks = walk.nbytes / AES_BLOCK_SIZE;
u8 *dst = walk.dst.virt.addr;
u8 *src = walk.src.virt.addr;
@@ -437,11 +437,18 @@ static int gcm_encrypt(struct aead_request *req)
NULL);
err = skcipher_walk_done(&walk,
- walk.nbytes % AES_BLOCK_SIZE);
+ walk.nbytes % (2 * AES_BLOCK_SIZE));
}
- if (walk.nbytes)
+ if (walk.nbytes) {
__aes_arm64_encrypt(ctx->aes_key.key_enc, ks, iv,
nrounds);
+ if (walk.nbytes > AES_BLOCK_SIZE) {
+ crypto_inc(iv, AES_BLOCK_SIZE);
+ __aes_arm64_encrypt(ctx->aes_key.key_enc,
+ ks + AES_BLOCK_SIZE, iv,
+ nrounds);
+ }
+ }
}
/* handle the tail */
@@ -545,7 +552,7 @@ static int gcm_decrypt(struct aead_request *req)
__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds);
put_unaligned_be32(2, iv + GCM_IV_SIZE);
- while (walk.nbytes >= AES_BLOCK_SIZE) {
+ while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) {
int blocks = walk.nbytes / AES_BLOCK_SIZE;
u8 *dst = walk.dst.virt.addr;
u8 *src = walk.src.virt.addr;
@@ -564,11 +571,21 @@ static int gcm_decrypt(struct aead_request *req)
} while (--blocks > 0);
err = skcipher_walk_done(&walk,
- walk.nbytes % AES_BLOCK_SIZE);
+ walk.nbytes % (2 * AES_BLOCK_SIZE));
}
- if (walk.nbytes)
+ if (walk.nbytes) {
+ if (walk.nbytes > AES_BLOCK_SIZE) {
+ u8 *iv2 = iv + AES_BLOCK_SIZE;
+
+ memcpy(iv2, iv, AES_BLOCK_SIZE);
+ crypto_inc(iv2, AES_BLOCK_SIZE);
+
+ __aes_arm64_encrypt(ctx->aes_key.key_enc, iv2,
+ iv2, nrounds);
+ }
__aes_arm64_encrypt(ctx->aes_key.key_enc, iv, iv,
nrounds);
+ }
}
/* handle the tail */
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index b7fb5274b250..0c4fc223f225 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -69,5 +69,5 @@ static void __exit sm4_ce_mod_fini(void)
crypto_unregister_alg(&sm4_ce_alg);
}
-module_cpu_feature_match(SM3, sm4_ce_mod_init);
+module_cpu_feature_match(SM4, sm4_ce_mod_init);
module_exit(sm4_ce_mod_fini);
diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c
index 3534aa6a4dc2..1b083c500b9a 100644
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -98,11 +98,10 @@ static time64_t pmu_read_time(void)
if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
return 0;
- while (!req.complete)
- pmu_poll();
+ pmu_wait_complete(&req);
- time = (u32)((req.reply[1] << 24) | (req.reply[2] << 16) |
- (req.reply[3] << 8) | req.reply[4]);
+ time = (u32)((req.reply[0] << 24) | (req.reply[1] << 16) |
+ (req.reply[2] << 8) | req.reply[3]);
return time - RTC_OFFSET;
}
@@ -116,8 +115,7 @@ static void pmu_write_time(time64_t time)
(data >> 24) & 0xFF, (data >> 16) & 0xFF,
(data >> 8) & 0xFF, data & 0xFF) < 0)
return;
- while (!req.complete)
- pmu_poll();
+ pmu_wait_complete(&req);
}
static __u8 pmu_read_pram(int offset)
diff --git a/arch/nios2/Kconfig.debug b/arch/nios2/Kconfig.debug
index 7a49f0d28d14..f1da8a7b17ff 100644
--- a/arch/nios2/Kconfig.debug
+++ b/arch/nios2/Kconfig.debug
@@ -3,15 +3,6 @@
config TRACE_IRQFLAGS_SUPPORT
def_bool y
-config DEBUG_STACK_USAGE
- bool "Enable stack utilization instrumentation"
- depends on DEBUG_KERNEL
- help
- Enables the display of the minimum amount of free stack which each
- task has ever had available in the sysrq-T and sysrq-P debug output.
-
- This option will slow down process creation somewhat.
-
config EARLY_PRINTK
bool "Activate early kernel debugging"
default y
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index db0b6eebbfa5..a80669209155 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -177,7 +177,6 @@ config PPC
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
- select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_CBPF_JIT if !PPC64
diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h
index c229509288ea..439dc7072e05 100644
--- a/arch/riscv/include/asm/tlb.h
+++ b/arch/riscv/include/asm/tlb.h
@@ -14,6 +14,10 @@
#ifndef _ASM_RISCV_TLB_H
#define _ASM_RISCV_TLB_H
+struct mmu_gather;
+
+static void tlb_flush(struct mmu_gather *tlb);
+
#include <asm-generic/tlb.h>
static inline void tlb_flush(struct mmu_gather *tlb)
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index 568026ccf6e8..fb03a4482ad6 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -65,24 +65,11 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
uintptr_t, flags)
{
-#ifdef CONFIG_SMP
- struct mm_struct *mm = current->mm;
- bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0;
-#endif
-
/* Check the reserved flags. */
if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL))
return -EINVAL;
- /*
- * Without CONFIG_SMP flush_icache_mm is a just a flush_icache_all(),
- * which generates unused variable warnings all over this function.
- */
-#ifdef CONFIG_SMP
- flush_icache_mm(mm, local);
-#else
- flush_icache_all();
-#endif
+ flush_icache_mm(current->mm, flags & SYS_RISCV_FLUSH_ICACHE_LOCAL);
return 0;
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c5ff296bc5d1..1a0be022f91d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2843,7 +2843,7 @@ config X86_SYSFB
This option, if enabled, marks VGA/VBE/EFI framebuffers as generic
framebuffers so the new generic system-framebuffer drivers can be
used on x86. If the framebuffer is not compatible with the generic
- modes, it is adverticed as fallback platform framebuffer so legacy
+ modes, it is advertised as fallback platform framebuffer so legacy
drivers like efifb, vesafb and uvesafb can pick it up.
If this option is not selected, all system framebuffers are always
marked as fallback platform framebuffers as usual.
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 94859241bc3e..8f6e7eb8ae9f 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -175,22 +175,6 @@ ifdef CONFIG_FUNCTION_GRAPH_TRACER
endif
endif
-ifndef CC_HAVE_ASM_GOTO
- $(error Compiler lacks asm-goto support.)
-endif
-
-#
-# Jump labels need '-maccumulate-outgoing-args' for gcc < 4.5.2 to prevent a
-# GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226). There's no way
-# to test for this bug at compile-time because the test case needs to execute,
-# which is a no-go for cross compilers. So check the GCC version instead.
-#
-ifdef CONFIG_JUMP_LABEL
- ifneq ($(ACCUMULATE_OUTGOING_ARGS), 1)
- ACCUMULATE_OUTGOING_ARGS = $(call cc-if-fullversion, -lt, 040502, 1)
- endif
-endif
-
ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
# This compiler flag is not supported by Clang:
KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
@@ -312,6 +296,13 @@ PHONY += vdso_install
vdso_install:
$(Q)$(MAKE) $(build)=arch/x86/entry/vdso $@
+archprepare: checkbin
+checkbin:
+ifndef CC_HAVE_ASM_GOTO
+ @echo Compiler lacks asm-goto support.
+ @exit 1
+endif
+
archclean:
$(Q)rm -rf $(objtree)/arch/i386
$(Q)rm -rf $(objtree)/arch/x86_64
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 9bd139569b41..cb2deb61c5d9 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -223,34 +223,34 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
pcmpeqd TWOONE(%rip), \TMP2
pand POLY(%rip), \TMP2
pxor \TMP2, \TMP3
- movdqa \TMP3, HashKey(%arg2)
+ movdqu \TMP3, HashKey(%arg2)
movdqa \TMP3, \TMP5
pshufd $78, \TMP3, \TMP1
pxor \TMP3, \TMP1
- movdqa \TMP1, HashKey_k(%arg2)
+ movdqu \TMP1, HashKey_k(%arg2)
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
# TMP5 = HashKey^2<<1 (mod poly)
- movdqa \TMP5, HashKey_2(%arg2)
+ movdqu \TMP5, HashKey_2(%arg2)
# HashKey_2 = HashKey^2<<1 (mod poly)
pshufd $78, \TMP5, \TMP1
pxor \TMP5, \TMP1
- movdqa \TMP1, HashKey_2_k(%arg2)
+ movdqu \TMP1, HashKey_2_k(%arg2)
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
# TMP5 = HashKey^3<<1 (mod poly)
- movdqa \TMP5, HashKey_3(%arg2)
+ movdqu \TMP5, HashKey_3(%arg2)
pshufd $78, \TMP5, \TMP1
pxor \TMP5, \TMP1
- movdqa \TMP1, HashKey_3_k(%arg2)
+ movdqu \TMP1, HashKey_3_k(%arg2)
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
# TMP5 = HashKey^3<<1 (mod poly)
- movdqa \TMP5, HashKey_4(%arg2)
+ movdqu \TMP5, HashKey_4(%arg2)
pshufd $78, \TMP5, \TMP1
pxor \TMP5, \TMP1
- movdqa \TMP1, HashKey_4_k(%arg2)
+ movdqu \TMP1, HashKey_4_k(%arg2)
.endm
# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
@@ -271,7 +271,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
- movdqa HashKey(%arg2), %xmm13
+ movdqu HashKey(%arg2), %xmm13
CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
%xmm4, %xmm5, %xmm6
@@ -997,7 +997,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pshufd $78, \XMM5, \TMP6
pxor \XMM5, \TMP6
paddd ONE(%rip), \XMM0 # INCR CNT
- movdqa HashKey_4(%arg2), \TMP5
+ movdqu HashKey_4(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
movdqa \XMM0, \XMM1
paddd ONE(%rip), \XMM0 # INCR CNT
@@ -1016,7 +1016,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pxor (%arg1), \XMM2
pxor (%arg1), \XMM3
pxor (%arg1), \XMM4
- movdqa HashKey_4_k(%arg2), \TMP5
+ movdqu HashKey_4_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
movaps 0x10(%arg1), \TMP1
AESENC \TMP1, \XMM1 # Round 1
@@ -1031,7 +1031,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM6, \TMP1
pshufd $78, \XMM6, \TMP2
pxor \XMM6, \TMP2
- movdqa HashKey_3(%arg2), \TMP5
+ movdqu HashKey_3(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
movaps 0x30(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 3
@@ -1044,7 +1044,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
- movdqa HashKey_3_k(%arg2), \TMP5
+ movdqu HashKey_3_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x50(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 5
@@ -1058,7 +1058,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM7, \TMP1
pshufd $78, \XMM7, \TMP2
pxor \XMM7, \TMP2
- movdqa HashKey_2(%arg2), \TMP5
+ movdqu HashKey_2(%arg2), \TMP5
# Multiply TMP5 * HashKey using karatsuba
@@ -1074,7 +1074,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
- movdqa HashKey_2_k(%arg2), \TMP5
+ movdqu HashKey_2_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x80(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 8
@@ -1092,7 +1092,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM8, \TMP1
pshufd $78, \XMM8, \TMP2
pxor \XMM8, \TMP2
- movdqa HashKey(%arg2), \TMP5
+ movdqu HashKey(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x90(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 9
@@ -1121,7 +1121,7 @@ aes_loop_par_enc_done\@:
AESENCLAST \TMP3, \XMM2
AESENCLAST \TMP3, \XMM3
AESENCLAST \TMP3, \XMM4
- movdqa HashKey_k(%arg2), \TMP5
+ movdqu HashKey_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movdqu (%arg4,%r11,1), \TMP3
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
@@ -1205,7 +1205,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pshufd $78, \XMM5, \TMP6
pxor \XMM5, \TMP6
paddd ONE(%rip), \XMM0 # INCR CNT
- movdqa HashKey_4(%arg2), \TMP5
+ movdqu HashKey_4(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
movdqa \XMM0, \XMM1
paddd ONE(%rip), \XMM0 # INCR CNT
@@ -1224,7 +1224,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pxor (%arg1), \XMM2
pxor (%arg1), \XMM3
pxor (%arg1), \XMM4
- movdqa HashKey_4_k(%arg2), \TMP5
+ movdqu HashKey_4_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
movaps 0x10(%arg1), \TMP1
AESENC \TMP1, \XMM1 # Round 1
@@ -1239,7 +1239,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM6, \TMP1
pshufd $78, \XMM6, \TMP2
pxor \XMM6, \TMP2
- movdqa HashKey_3(%arg2), \TMP5
+ movdqu HashKey_3(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
movaps 0x30(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 3
@@ -1252,7 +1252,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
- movdqa HashKey_3_k(%arg2), \TMP5
+ movdqu HashKey_3_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x50(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 5
@@ -1266,7 +1266,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM7, \TMP1
pshufd $78, \XMM7, \TMP2
pxor \XMM7, \TMP2
- movdqa HashKey_2(%arg2), \TMP5
+ movdqu HashKey_2(%arg2), \TMP5
# Multiply TMP5 * HashKey using karatsuba
@@ -1282,7 +1282,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
- movdqa HashKey_2_k(%arg2), \TMP5
+ movdqu HashKey_2_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x80(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 8
@@ -1300,7 +1300,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM8, \TMP1
pshufd $78, \XMM8, \TMP2
pxor \XMM8, \TMP2
- movdqa HashKey(%arg2), \TMP5
+ movdqu HashKey(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x90(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 9
@@ -1329,7 +1329,7 @@ aes_loop_par_dec_done\@:
AESENCLAST \TMP3, \XMM2
AESENCLAST \TMP3, \XMM3
AESENCLAST \TMP3, \XMM4
- movdqa HashKey_k(%arg2), \TMP5
+ movdqu HashKey_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movdqu (%arg4,%r11,1), \TMP3
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
@@ -1405,10 +1405,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
movdqa \XMM1, \TMP6
pshufd $78, \XMM1, \TMP2
pxor \XMM1, \TMP2
- movdqa HashKey_4(%arg2), \TMP5
+ movdqu HashKey_4(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0
- movdqa HashKey_4_k(%arg2), \TMP4
+ movdqu HashKey_4_k(%arg2), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movdqa \XMM1, \XMMDst
movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1
@@ -1418,10 +1418,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
movdqa \XMM2, \TMP1
pshufd $78, \XMM2, \TMP2
pxor \XMM2, \TMP2
- movdqa HashKey_3(%arg2), \TMP5
+ movdqu HashKey_3(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0
- movdqa HashKey_3_k(%arg2), \TMP4
+ movdqu HashKey_3_k(%arg2), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6
pxor \XMM2, \XMMDst
@@ -1433,10 +1433,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
movdqa \XMM3, \TMP1
pshufd $78, \XMM3, \TMP2
pxor \XMM3, \TMP2
- movdqa HashKey_2(%arg2), \TMP5
+ movdqu HashKey_2(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0
- movdqa HashKey_2_k(%arg2), \TMP4
+ movdqu HashKey_2_k(%arg2), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6
pxor \XMM3, \XMMDst
@@ -1446,10 +1446,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
movdqa \XMM4, \TMP1
pshufd $78, \XMM4, \TMP2
pxor \XMM4, \TMP2
- movdqa HashKey(%arg2), \TMP5
+ movdqu HashKey(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0
- movdqa HashKey_k(%arg2), \TMP4
+ movdqu HashKey_k(%arg2), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6
pxor \XMM4, \XMMDst
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 5f4829f10129..dfb2f7c0d019 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2465,7 +2465,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
perf_callchain_store(entry, regs->ip);
- if (!current->mm)
+ if (!nmi_uaccess_okay())
return;
if (perf_callchain_user32(regs, entry))
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c14f2a74b2be..15450a675031 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -33,7 +33,8 @@ extern inline unsigned long native_save_fl(void)
return flags;
}
-static inline void native_restore_fl(unsigned long flags)
+extern inline void native_restore_fl(unsigned long flags);
+extern inline void native_restore_fl(unsigned long flags)
{
asm volatile("push %0 ; popf"
: /* no output */
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index a564084c6141..f8b1ad2c3828 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_PGTABLE_3LEVEL_H
#define _ASM_X86_PGTABLE_3LEVEL_H
+#include <asm/atomic64_32.h>
+
/*
* Intel Physical Address Extension (PAE) Mode - three-level page
* tables on PPro+ CPUs.
@@ -150,10 +152,7 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
{
pte_t res;
- /* xchg acts as a barrier before the setting of the high bits */
- res.pte_low = xchg(&ptep->pte_low, 0);
- res.pte_high = ptep->pte_high;
- ptep->pte_high = 0;
+ res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0);
return res;
}
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c24297268ebc..d53c54b842da 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -132,6 +132,8 @@ struct cpuinfo_x86 {
/* Index into per_cpu list: */
u16 cpu_index;
u32 microcode;
+ /* Address space bits used by the cache internally */
+ u8 x86_cache_bits;
unsigned initialized : 1;
} __randomize_layout;
@@ -183,7 +185,7 @@ extern void cpu_detect(struct cpuinfo_x86 *c);
static inline unsigned long long l1tf_pfn_limit(void)
{
- return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT);
+ return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
}
extern void early_cpu_init(void);
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 5f9012ff52ed..33d3c88a7225 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -39,6 +39,7 @@ extern void do_signal(struct pt_regs *regs);
#define __ARCH_HAS_SA_RESTORER
+#include <asm/asm.h>
#include <uapi/asm/sigcontext.h>
#ifdef __i386__
@@ -86,9 +87,9 @@ static inline int __const_sigismember(sigset_t *set, int _sig)
static inline int __gen_sigismember(sigset_t *set, int _sig)
{
- unsigned char ret;
- asm("btl %2,%1\n\tsetc %0"
- : "=qm"(ret) : "m"(*set), "Ir"(_sig-1) : "cc");
+ bool ret;
+ asm("btl %2,%1" CC_SET(c)
+ : CC_OUT(c) (ret) : "m"(*set), "Ir"(_sig-1));
return ret;
}
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index b6dc698f992a..f335aad404a4 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -111,6 +111,6 @@ static inline unsigned long caller_frame_pointer(void)
return (unsigned long)frame;
}
-void show_opcodes(u8 *rip, const char *loglvl);
+void show_opcodes(struct pt_regs *regs, const char *loglvl);
void show_ip(struct pt_regs *regs, const char *loglvl);
#endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 29c9da6c62fc..58ce5288878e 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -175,8 +175,16 @@ struct tlb_state {
* are on. This means that it may not match current->active_mm,
* which will contain the previous user mm when we're in lazy TLB
* mode even if we've already switched back to swapper_pg_dir.
+ *
+ * During switch_mm_irqs_off(), loaded_mm will be set to
+ * LOADED_MM_SWITCHING during the brief interrupts-off window
+ * when CR3 and loaded_mm would otherwise be inconsistent. This
+ * is for nmi_uaccess_okay()'s benefit.
*/
struct mm_struct *loaded_mm;
+
+#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
+
u16 loaded_mm_asid;
u16 next_asid;
/* last user mm's ctx id */
@@ -246,6 +254,38 @@ struct tlb_state {
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
+/*
+ * Blindly accessing user memory from NMI context can be dangerous
+ * if we're in the middle of switching the current user task or
+ * switching the loaded mm. It can also be dangerous if we
+ * interrupted some kernel code that was temporarily using a
+ * different mm.
+ */
+static inline bool nmi_uaccess_okay(void)
+{
+ struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+ struct mm_struct *current_mm = current->mm;
+
+ VM_WARN_ON_ONCE(!loaded_mm);
+
+ /*
+ * The condition we want to check is
+ * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though,
+ * if we're running in a VM with shadow paging, and nmi_uaccess_okay()
+ * is supposed to be reasonably fast.
+ *
+ * Instead, we check the almost equivalent but somewhat conservative
+ * condition below, and we rely on the fact that switch_mm_irqs_off()
+ * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3.
+ */
+ if (loaded_mm != current_mm)
+ return false;
+
+ VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));
+
+ return true;
+}
+
/* Initialize cr4 shadow for this CPU. */
static inline void cr4_init_shadow(void)
{
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index fb856c9f0449..53748541c487 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -93,7 +93,7 @@ static inline unsigned int __getcpu(void)
*
* If RDPID is available, use it.
*/
- alternative_io ("lsl %[p],%[seg]",
+ alternative_io ("lsl %[seg],%[p]",
".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
X86_FEATURE_RDPID,
[p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 014f214da581..b9d5e7c9ef43 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -684,8 +684,6 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
* It means the size must be writable atomically and the address must be aligned
* in a way that permits an atomic write. It also makes sure we fit on a single
* page.
- *
- * Note: Must be called under text_mutex.
*/
void *text_poke(void *addr, const void *opcode, size_t len)
{
@@ -700,6 +698,8 @@ void *text_poke(void *addr, const void *opcode, size_t len)
*/
BUG_ON(!after_bootmem);
+ lockdep_assert_held(&text_mutex);
+
if (!core_kernel_text((unsigned long)addr)) {
pages[0] = vmalloc_to_page(addr);
pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
@@ -782,8 +782,6 @@ int poke_int3_handler(struct pt_regs *regs)
* - replace the first byte (int3) by the first byte of
* replacing opcode
* - sync cores
- *
- * Note: must be called under text_mutex.
*/
void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
{
@@ -792,6 +790,9 @@ void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
bp_int3_handler = handler;
bp_int3_addr = (u8 *)addr + sizeof(int3);
bp_patching_in_progress = true;
+
+ lockdep_assert_held(&text_mutex);
+
/*
* Corresponding read barrier in int3 notifier for making sure the
* in_progress and handler are correctly ordered wrt. patching.
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 4c2313d0b9ca..40bdaea97fe7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -668,6 +668,45 @@ EXPORT_SYMBOL_GPL(l1tf_mitigation);
enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation);
+/*
+ * These CPUs all support 44bits physical address space internally in the
+ * cache but CPUID can report a smaller number of physical address bits.
+ *
+ * The L1TF mitigation uses the top most address bit for the inversion of
+ * non present PTEs. When the installed memory reaches into the top most
+ * address bit due to memory holes, which has been observed on machines
+ * which report 36bits physical address bits and have 32G RAM installed,
+ * then the mitigation range check in l1tf_select_mitigation() triggers.
+ * This is a false positive because the mitigation is still possible due to
+ * the fact that the cache uses 44bit internally. Use the cache bits
+ * instead of the reported physical bits and adjust them on the affected
+ * machines to 44bit if the reported bits are less than 44.
+ */
+static void override_cache_bits(struct cpuinfo_x86 *c)
+{
+ if (c->x86 != 6)
+ return;
+
+ switch (c->x86_model) {
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_ULT:
+ case INTEL_FAM6_HASWELL_GT3E:
+ case INTEL_FAM6_BROADWELL_CORE:
+ case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
+ if (c->x86_cache_bits < 44)
+ c->x86_cache_bits = 44;
+ break;
+ }
+}
+
static void __init l1tf_select_mitigation(void)
{
u64 half_pa;
@@ -675,6 +714,8 @@ static void __init l1tf_select_mitigation(void)
if (!boot_cpu_has_bug(X86_BUG_L1TF))
return;
+ override_cache_bits(&boot_cpu_data);
+
switch (l1tf_mitigation) {
case L1TF_MITIGATION_OFF:
case L1TF_MITIGATION_FLUSH_NOWARN:
@@ -694,11 +735,6 @@ static void __init l1tf_select_mitigation(void)
return;
#endif
- /*
- * This is extremely unlikely to happen because almost all
- * systems have far more MAX_PA/2 than RAM can be fit into
- * DIMM slots.
- */
half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 84dee5ab745a..44c4ef3d989b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -919,6 +919,7 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c)
else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
c->x86_phys_bits = 36;
#endif
+ c->x86_cache_bits = c->x86_phys_bits;
}
static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 401e8c133108..fc3c07fe7df5 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -150,6 +150,9 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_HYPERVISOR))
return false;
+ if (c->x86 != 6)
+ return false;
+
for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
if (c->x86_model == spectre_bad_microcodes[i].model &&
c->x86_stepping == spectre_bad_microcodes[i].stepping)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 9c8652974f8e..f56895106ccf 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -17,6 +17,7 @@
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/sysfs.h>
+#include <linux/kasan.h>
#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
@@ -89,14 +90,24 @@ static void printk_stack_address(unsigned long address, int reliable,
* Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random
* guesstimate in attempt to achieve all of the above.
*/
-void show_opcodes(u8 *rip, const char *loglvl)
+void show_opcodes(struct pt_regs *regs, const char *loglvl)
{
#define PROLOGUE_SIZE 42
#define EPILOGUE_SIZE 21
#define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE)
u8 opcodes[OPCODE_BUFSIZE];
+ unsigned long prologue = regs->ip - PROLOGUE_SIZE;
+ bool bad_ip;
- if (probe_kernel_read(opcodes, rip - PROLOGUE_SIZE, OPCODE_BUFSIZE)) {
+ /*
+ * Make sure userspace isn't trying to trick us into dumping kernel
+ * memory by pointing the userspace instruction pointer at it.
+ */
+ bad_ip = user_mode(regs) &&
+ __chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX);
+
+ if (bad_ip || probe_kernel_read(opcodes, (u8 *)prologue,
+ OPCODE_BUFSIZE)) {
printk("%sCode: Bad RIP value.\n", loglvl);
} else {
printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
@@ -112,7 +123,7 @@ void show_ip(struct pt_regs *regs, const char *loglvl)
#else
printk("%sRIP: %04x:%pS\n", loglvl, (int)regs->cs, (void *)regs->ip);
#endif
- show_opcodes((u8 *)regs->ip, loglvl);
+ show_opcodes(regs, loglvl);
}
void show_iret_regs(struct pt_regs *regs)
@@ -346,7 +357,10 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
* We're not going to return, but we might be on an IST stack or
* have very little stack space left. Rewind the stack and kill
* the task.
+ * Before we rewind the stack, we have to tell KASAN that we're going to
+ * reuse the task stack and that existing poisons are invalid.
*/
+ kasan_unpoison_task_stack(current);
rewind_stack_do_exit(signr);
}
NOKPROBE_SYMBOL(oops_end);
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index c8c6ad0d58b8..3f435d7fca5e 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -7,6 +7,8 @@
#include <linux/uaccess.h>
#include <linux/export.h>
+#include <asm/tlbflush.h>
+
/*
* We rely on the nested NMI work to allow atomic faults from the NMI path; the
* nested NMI paths are careful to preserve CR2.
@@ -19,6 +21,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
if (__range_not_ok(from, n, TASK_SIZE))
return n;
+ if (!nmi_uaccess_okay())
+ return n;
+
/*
* Even though this function is typically called from NMI/IRQ context
* disable pagefaults so that its behaviour is consistent even when
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b9123c497e0a..47bebfe6efa7 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -837,7 +837,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
printk(KERN_CONT "\n");
- show_opcodes((u8 *)regs->ip, loglvl);
+ show_opcodes(regs, loglvl);
}
static void
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 8d6c34fe49be..51a5a69ecac9 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1420,6 +1420,29 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
return 0;
}
+/*
+ * Machine check recovery code needs to change cache mode of poisoned
+ * pages to UC to avoid speculative access logging another error. But
+ * passing the address of the 1:1 mapping to set_memory_uc() is a fine
+ * way to encourage a speculative access. So we cheat and flip the top
+ * bit of the address. This works fine for the code that updates the
+ * page tables. But at the end of the process we need to flush the cache
+ * and the non-canonical address causes a #GP fault when used by the
+ * CLFLUSH instruction.
+ *
+ * But in the common case we already have a canonical address. This code
+ * will fix the top bit if needed and is a no-op otherwise.
+ */
+static inline unsigned long make_addr_canonical_again(unsigned long addr)
+{
+#ifdef CONFIG_X86_64
+ return (long)(addr << 1) >> 1;
+#else
+ return addr;
+#endif
+}
+
+
static int change_page_attr_set_clr(unsigned long *addr, int numpages,
pgprot_t mask_set, pgprot_t mask_clr,
int force_split, int in_flag,
@@ -1465,7 +1488,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
* Save address for cache flush. *addr is modified in the call
* to __change_page_attr_set_clr() below.
*/
- baddr = *addr;
+ baddr = make_addr_canonical_again(*addr);
}
/* Must avoid aliasing mappings in the highmem code */
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 31341ae7309f..c1fc1ae6b429 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -248,7 +248,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
*
* Returns a pointer to a PTE on success, or NULL on failure.
*/
-static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
+static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
{
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
pmd_t *pmd;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 9517d1b2a281..e96b99eb800c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -305,6 +305,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+ /* Let nmi_uaccess_okay() know that we're changing CR3. */
+ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
+ barrier();
+
if (need_flush) {
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
@@ -335,6 +339,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (next != &init_mm)
this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
+ /* Make sure we write CR3 before loaded_mm. */
+ barrier();
+
this_cpu_write(cpu_tlbstate.loaded_mm, next);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
}
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 324b93328b37..05ca14222463 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -85,14 +85,10 @@ pgd_t * __init efi_call_phys_prolog(void)
void __init efi_call_phys_epilog(pgd_t *save_pgd)
{
- struct desc_ptr gdt_descr;
-
- gdt_descr.address = (unsigned long)get_cpu_gdt_rw(0);
- gdt_descr.size = GDT_SIZE - 1;
- load_gdt(&gdt_descr);
-
load_cr3(save_pgd);
__flush_tlb_all();
+
+ load_fixmap_gdt(0);
}
void __init efi_runtime_update_mappings(void)
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 45b700ac5fe7..2fe5c9b1816b 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -435,14 +435,13 @@ static void xen_set_pud(pud_t *ptr, pud_t val)
static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
{
trace_xen_mmu_set_pte_atomic(ptep, pte);
- set_64bit((u64 *)ptep, native_pte_val(pte));
+ __xen_set_pte(ptep, pte);
}
static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
trace_xen_mmu_pte_clear(mm, addr, ptep);
- if (!xen_batched_set_pte(ptep, native_make_pte(0)))
- native_pte_clear(mm, addr, ptep);
+ __xen_set_pte(ptep, native_make_pte(0));
}
static void xen_pmd_clear(pmd_t *pmdp)
@@ -1570,7 +1569,7 @@ static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
pte_val_ma(pte));
#endif
- native_set_pte(ptep, pte);
+ __xen_set_pte(ptep, pte);
}
/* Early in boot, while setting up the initial pagetable, assume
@@ -2061,7 +2060,6 @@ void __init xen_relocate_p2m(void)
pud_t *pud;
pgd_t *pgd;
unsigned long *new_p2m;
- int save_pud;
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
n_pte = roundup(size, PAGE_SIZE) >> PAGE_SHIFT;
@@ -2091,7 +2089,6 @@ void __init xen_relocate_p2m(void)
pgd = __va(read_cr3_pa());
new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
- save_pud = n_pud;
for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
pud = early_memremap(pud_phys, PAGE_SIZE);
clear_page(pud);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 84507d3e9a98..8e20a0677dcf 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -123,16 +123,11 @@ static void rwb_wake_all(struct rq_wb *rwb)
}
}
-static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
+static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
+ enum wbt_flags wb_acct)
{
- struct rq_wb *rwb = RQWB(rqos);
- struct rq_wait *rqw;
int inflight, limit;
- if (!(wb_acct & WBT_TRACKED))
- return;
-
- rqw = get_rq_wait(rwb, wb_acct);
inflight = atomic_dec_return(&rqw->inflight);
/*
@@ -166,10 +161,22 @@ static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
int diff = limit - inflight;
if (!inflight || diff >= rwb->wb_background / 2)
- wake_up(&rqw->wait);
+ wake_up_all(&rqw->wait);
}
}
+static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
+{
+ struct rq_wb *rwb = RQWB(rqos);
+ struct rq_wait *rqw;
+
+ if (!(wb_acct & WBT_TRACKED))
+ return;
+
+ rqw = get_rq_wait(rwb, wb_acct);
+ wbt_rqw_done(rwb, rqw, wb_acct);
+}
+
/*
* Called on completion of a request. Note that it's also called when
* a request is merged, when the request gets freed.
@@ -481,6 +488,34 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
return limit;
}
+struct wbt_wait_data {
+ struct wait_queue_entry wq;
+ struct task_struct *task;
+ struct rq_wb *rwb;
+ struct rq_wait *rqw;
+ unsigned long rw;
+ bool got_token;
+};
+
+static int wbt_wake_function(struct wait_queue_entry *curr, unsigned int mode,
+ int wake_flags, void *key)
+{
+ struct wbt_wait_data *data = container_of(curr, struct wbt_wait_data,
+ wq);
+
+ /*
+ * If we fail to get a budget, return -1 to interrupt the wake up
+ * loop in __wake_up_common.
+ */
+ if (!rq_wait_inc_below(data->rqw, get_limit(data->rwb, data->rw)))
+ return -1;
+
+ data->got_token = true;
+ list_del_init(&curr->entry);
+ wake_up_process(data->task);
+ return 1;
+}
+
/*
* Block if we will exceed our limit, or if we are currently waiting for
* the timer to kick off queuing again.
@@ -491,19 +526,40 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
__acquires(lock)
{
struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
- DECLARE_WAITQUEUE(wait, current);
+ struct wbt_wait_data data = {
+ .wq = {
+ .func = wbt_wake_function,
+ .entry = LIST_HEAD_INIT(data.wq.entry),
+ },
+ .task = current,
+ .rwb = rwb,
+ .rqw = rqw,
+ .rw = rw,
+ };
bool has_sleeper;
has_sleeper = wq_has_sleeper(&rqw->wait);
if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
return;
- add_wait_queue_exclusive(&rqw->wait, &wait);
+ prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE);
do {
- set_current_state(TASK_UNINTERRUPTIBLE);
+ if (data.got_token)
+ break;
- if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
+ if (!has_sleeper &&
+ rq_wait_inc_below(rqw, get_limit(rwb, rw))) {
+ finish_wait(&rqw->wait, &data.wq);
+
+ /*
+ * We raced with wbt_wake_function() getting a token,
+ * which means we now have two. Put our local token
+ * and wake anyone else potentially waiting for one.
+ */
+ if (data.got_token)
+ wbt_rqw_done(rwb, rqw, wb_acct);
break;
+ }
if (lock) {
spin_unlock_irq(lock);
@@ -511,11 +567,11 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
spin_lock_irq(lock);
} else
io_schedule();
+
has_sleeper = false;
} while (1);
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&rqw->wait, &wait);
+ finish_wait(&rqw->wait, &data.wq);
}
static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
@@ -580,11 +636,6 @@ static void wbt_wait(struct rq_qos *rqos, struct bio *bio, spinlock_t *lock)
return;
}
- if (current_is_kswapd())
- flags |= WBT_KSWAPD;
- if (bio_op(bio) == REQ_OP_DISCARD)
- flags |= WBT_DISCARD;
-
__wbt_wait(rwb, flags, bio->bi_opf, lock);
if (!blk_stat_is_active(rwb->cb))
diff --git a/block/bsg.c b/block/bsg.c
index db588add6ba6..9a442c23a715 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -37,7 +37,7 @@ struct bsg_device {
struct request_queue *queue;
spinlock_t lock;
struct hlist_node dev_list;
- atomic_t ref_count;
+ refcount_t ref_count;
char name[20];
int max_queue;
};
@@ -252,7 +252,7 @@ static int bsg_put_device(struct bsg_device *bd)
mutex_lock(&bsg_mutex);
- if (!atomic_dec_and_test(&bd->ref_count)) {
+ if (!refcount_dec_and_test(&bd->ref_count)) {
mutex_unlock(&bsg_mutex);
return 0;
}
@@ -290,7 +290,7 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
bd->queue = rq;
- atomic_set(&bd->ref_count, 1);
+ refcount_set(&bd->ref_count, 1);
hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode)));
strncpy(bd->name, dev_name(rq->bsg_dev.class_dev), sizeof(bd->name) - 1);
@@ -308,7 +308,7 @@ static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q)
hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) {
if (bd->queue == q) {
- atomic_inc(&bd->ref_count);
+ refcount_inc(&bd->ref_count);
goto found;
}
}
diff --git a/block/elevator.c b/block/elevator.c
index 5ea6e7d600e4..6a06b5d040e5 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -895,8 +895,7 @@ int elv_register(struct elevator_type *e)
spin_lock(&elv_list_lock);
if (elevator_find(e->elevator_name, e->uses_mq)) {
spin_unlock(&elv_list_lock);
- if (e->icq_cache)
- kmem_cache_destroy(e->icq_cache);
+ kmem_cache_destroy(e->icq_cache);
return -EBUSY;
}
list_add_tail(&e->list, &elv_list);
diff --git a/drivers/ata/pata_ftide010.c b/drivers/ata/pata_ftide010.c
index 5d4b72e21161..569a4a662dcd 100644
--- a/drivers/ata/pata_ftide010.c
+++ b/drivers/ata/pata_ftide010.c
@@ -256,14 +256,12 @@ static struct ata_port_operations pata_ftide010_port_ops = {
.qc_issue = ftide010_qc_issue,
};
-static struct ata_port_info ftide010_port_info[] = {
- {
- .flags = ATA_FLAG_SLAVE_POSS,
- .mwdma_mask = ATA_MWDMA2,
- .udma_mask = ATA_UDMA6,
- .pio_mask = ATA_PIO4,
- .port_ops = &pata_ftide010_port_ops,
- },
+static struct ata_port_info ftide010_port_info = {
+ .flags = ATA_FLAG_SLAVE_POSS,
+ .mwdma_mask = ATA_MWDMA2,
+ .udma_mask = ATA_UDMA6,
+ .pio_mask = ATA_PIO4,
+ .port_ops = &pata_ftide010_port_ops,
};
#if IS_ENABLED(CONFIG_SATA_GEMINI)
@@ -349,6 +347,7 @@ static int pata_ftide010_gemini_cable_detect(struct ata_port *ap)
}
static int pata_ftide010_gemini_init(struct ftide010 *ftide,
+ struct ata_port_info *pi,
bool is_ata1)
{
struct device *dev = ftide->dev;
@@ -373,7 +372,13 @@ static int pata_ftide010_gemini_init(struct ftide010 *ftide,
/* Flag port as SATA-capable */
if (gemini_sata_bridge_enabled(sg, is_ata1))
- ftide010_port_info[0].flags |= ATA_FLAG_SATA;
+ pi->flags |= ATA_FLAG_SATA;
+
+ /* This device has broken DMA, only PIO works */
+ if (of_machine_is_compatible("itian,sq201")) {
+ pi->mwdma_mask = 0;
+ pi->udma_mask = 0;
+ }
/*
* We assume that a simple 40-wire cable is used in the PATA mode.
@@ -435,6 +440,7 @@ static int pata_ftide010_gemini_init(struct ftide010 *ftide,
}
#else
static int pata_ftide010_gemini_init(struct ftide010 *ftide,
+ struct ata_port_info *pi,
bool is_ata1)
{
return -ENOTSUPP;
@@ -446,7 +452,7 @@ static int pata_ftide010_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct device_node *np = dev->of_node;
- const struct ata_port_info pi = ftide010_port_info[0];
+ struct ata_port_info pi = ftide010_port_info;
const struct ata_port_info *ppi[] = { &pi, NULL };
struct ftide010 *ftide;
struct resource *res;
@@ -490,6 +496,7 @@ static int pata_ftide010_probe(struct platform_device *pdev)
* are ATA0. This will also set up the cable types.
*/
ret = pata_ftide010_gemini_init(ftide,
+ &pi,
(res->start == 0x63400000));
if (ret)
goto err_dis_clk;
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 8e2e4757adcb..5a42ae4078c2 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -185,7 +185,7 @@ EXPORT_SYMBOL_GPL(of_pm_clk_add_clk);
int of_pm_clk_add_clks(struct device *dev)
{
struct clk **clks;
- unsigned int i, count;
+ int i, count;
int ret;
if (!dev || !dev->of_node)
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index b55b245e8052..fd1e19f1a49f 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -84,6 +84,18 @@ MODULE_PARM_DESC(max_persistent_grants,
"Maximum number of grants to map persistently");
/*
+ * How long a persistent grant is allowed to remain allocated without being in
+ * use. The time is in seconds, 0 means indefinitely long.
+ */
+
+static unsigned int xen_blkif_pgrant_timeout = 60;
+module_param_named(persistent_grant_unused_seconds, xen_blkif_pgrant_timeout,
+ uint, 0644);
+MODULE_PARM_DESC(persistent_grant_unused_seconds,
+ "Time in seconds an unused persistent grant is allowed to "
+ "remain allocated. Default is 60, 0 means unlimited.");
+
+/*
* Maximum number of rings/queues blkback supports, allow as many queues as there
* are CPUs if user has not specified a value.
*/
@@ -123,6 +135,13 @@ module_param(log_stats, int, 0644);
/* Number of free pages to remove on each call to gnttab_free_pages */
#define NUM_BATCH_FREE_PAGES 10
+static inline bool persistent_gnt_timeout(struct persistent_gnt *persistent_gnt)
+{
+ return xen_blkif_pgrant_timeout &&
+ (jiffies - persistent_gnt->last_used >=
+ HZ * xen_blkif_pgrant_timeout);
+}
+
static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
{
unsigned long flags;
@@ -236,8 +255,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring,
}
}
- bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE);
- set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
+ persistent_gnt->active = true;
/* Add new node and rebalance tree. */
rb_link_node(&(persistent_gnt->node), parent, new);
rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts);
@@ -261,11 +279,11 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
else if (gref > data->gnt)
node = node->rb_right;
else {
- if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) {
+ if (data->active) {
pr_alert_ratelimited("requesting a grant already in use\n");
return NULL;
}
- set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
+ data->active = true;
atomic_inc(&ring->persistent_gnt_in_use);
return data;
}
@@ -276,10 +294,10 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
static void put_persistent_gnt(struct xen_blkif_ring *ring,
struct persistent_gnt *persistent_gnt)
{
- if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+ if (!persistent_gnt->active)
pr_alert_ratelimited("freeing a grant already unused\n");
- set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
- clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
+ persistent_gnt->last_used = jiffies;
+ persistent_gnt->active = false;
atomic_dec(&ring->persistent_gnt_in_use);
}
@@ -371,26 +389,26 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring)
struct persistent_gnt *persistent_gnt;
struct rb_node *n;
unsigned int num_clean, total;
- bool scan_used = false, clean_used = false;
+ bool scan_used = false;
struct rb_root *root;
- if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
- (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
- !ring->blkif->vbd.overflow_max_grants)) {
- goto out;
- }
-
if (work_busy(&ring->persistent_purge_work)) {
pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n");
goto out;
}
- num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
- num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
- num_clean = min(ring->persistent_gnt_c, num_clean);
- if ((num_clean == 0) ||
- (num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use))))
- goto out;
+ if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
+ (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
+ !ring->blkif->vbd.overflow_max_grants)) {
+ num_clean = 0;
+ } else {
+ num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
+ num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants +
+ num_clean;
+ num_clean = min(ring->persistent_gnt_c, num_clean);
+ pr_debug("Going to purge at least %u persistent grants\n",
+ num_clean);
+ }
/*
* At this point, we can assure that there will be no calls
@@ -401,9 +419,7 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring)
* number of grants.
*/
- total = num_clean;
-
- pr_debug("Going to purge %u persistent grants\n", num_clean);
+ total = 0;
BUG_ON(!list_empty(&ring->persistent_purge_list));
root = &ring->persistent_gnts;
@@ -412,46 +428,37 @@ purge_list:
BUG_ON(persistent_gnt->handle ==
BLKBACK_INVALID_HANDLE);
- if (clean_used) {
- clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
+ if (persistent_gnt->active)
continue;
- }
-
- if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+ if (!scan_used && !persistent_gnt_timeout(persistent_gnt))
continue;
- if (!scan_used &&
- (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags)))
+ if (scan_used && total >= num_clean)
continue;
rb_erase(&persistent_gnt->node, root);
list_add(&persistent_gnt->remove_node,
&ring->persistent_purge_list);
- if (--num_clean == 0)
- goto finished;
+ total++;
}
/*
- * If we get here it means we also need to start cleaning
+ * Check whether we also need to start cleaning
* grants that were used since last purge in order to cope
* with the requested num
*/
- if (!scan_used && !clean_used) {
- pr_debug("Still missing %u purged frames\n", num_clean);
+ if (!scan_used && total < num_clean) {
+ pr_debug("Still missing %u purged frames\n", num_clean - total);
scan_used = true;
goto purge_list;
}
-finished:
- if (!clean_used) {
- pr_debug("Finished scanning for grants to clean, removing used flag\n");
- clean_used = true;
- goto purge_list;
- }
- ring->persistent_gnt_c -= (total - num_clean);
- ring->blkif->vbd.overflow_max_grants = 0;
+ if (total) {
+ ring->persistent_gnt_c -= total;
+ ring->blkif->vbd.overflow_max_grants = 0;
- /* We can defer this work */
- schedule_work(&ring->persistent_purge_work);
- pr_debug("Purged %u/%u\n", (total - num_clean), total);
+ /* We can defer this work */
+ schedule_work(&ring->persistent_purge_work);
+ pr_debug("Purged %u/%u\n", num_clean, total);
+ }
out:
return;
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index ecb35fe8ca8d..1d3002d773f7 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -233,16 +233,6 @@ struct xen_vbd {
struct backend_info;
-/* Number of available flags */
-#define PERSISTENT_GNT_FLAGS_SIZE 2
-/* This persistent grant is currently in use */
-#define PERSISTENT_GNT_ACTIVE 0
-/*
- * This persistent grant has been used, this flag is set when we remove the
- * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently.
- */
-#define PERSISTENT_GNT_WAS_ACTIVE 1
-
/* Number of requests that we can fit in a ring */
#define XEN_BLKIF_REQS_PER_PAGE 32
@@ -250,7 +240,8 @@ struct persistent_gnt {
struct page *page;
grant_ref_t gnt;
grant_handle_t handle;
- DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE);
+ unsigned long last_used;
+ bool active;
struct rb_node node;
struct list_head remove_node;
};
@@ -278,7 +269,6 @@ struct xen_blkif_ring {
wait_queue_head_t pending_free_wq;
/* Tree to store persistent grants. */
- spinlock_t pers_gnts_lock;
struct rb_root persistent_gnts;
unsigned int persistent_gnt_c;
atomic_t persistent_gnt_in_use;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 8986adab9bf5..a71d817e900d 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -46,6 +46,7 @@
#include <linux/scatterlist.h>
#include <linux/bitmap.h>
#include <linux/list.h>
+#include <linux/workqueue.h>
#include <xen/xen.h>
#include <xen/xenbus.h>
@@ -121,6 +122,8 @@ static inline struct blkif_req *blkif_req(struct request *rq)
static DEFINE_MUTEX(blkfront_mutex);
static const struct block_device_operations xlvbd_block_fops;
+static struct delayed_work blkfront_work;
+static LIST_HEAD(info_list);
/*
* Maximum number of segments in indirect requests, the actual value used by
@@ -216,6 +219,7 @@ struct blkfront_info
/* Save uncomplete reqs and bios for migration. */
struct list_head requests;
struct bio_list bio_list;
+ struct list_head info_list;
};
static unsigned int nr_minors;
@@ -1759,6 +1763,12 @@ abort_transaction:
return err;
}
+static void free_info(struct blkfront_info *info)
+{
+ list_del(&info->info_list);
+ kfree(info);
+}
+
/* Common code used when first setting up, and when resuming. */
static int talk_to_blkback(struct xenbus_device *dev,
struct blkfront_info *info)
@@ -1880,7 +1890,10 @@ again:
destroy_blkring:
blkif_free(info, 0);
- kfree(info);
+ mutex_lock(&blkfront_mutex);
+ free_info(info);
+ mutex_unlock(&blkfront_mutex);
+
dev_set_drvdata(&dev->dev, NULL);
return err;
@@ -1991,6 +2004,10 @@ static int blkfront_probe(struct xenbus_device *dev,
info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
dev_set_drvdata(&dev->dev, info);
+ mutex_lock(&blkfront_mutex);
+ list_add(&info->info_list, &info_list);
+ mutex_unlock(&blkfront_mutex);
+
return 0;
}
@@ -2301,6 +2318,12 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
if (indirect_segments <= BLKIF_MAX_SEGMENTS_PER_REQUEST)
indirect_segments = 0;
info->max_indirect_segments = indirect_segments;
+
+ if (info->feature_persistent) {
+ mutex_lock(&blkfront_mutex);
+ schedule_delayed_work(&blkfront_work, HZ * 10);
+ mutex_unlock(&blkfront_mutex);
+ }
}
/*
@@ -2482,7 +2505,9 @@ static int blkfront_remove(struct xenbus_device *xbdev)
mutex_unlock(&info->mutex);
if (!bdev) {
- kfree(info);
+ mutex_lock(&blkfront_mutex);
+ free_info(info);
+ mutex_unlock(&blkfront_mutex);
return 0;
}
@@ -2502,7 +2527,9 @@ static int blkfront_remove(struct xenbus_device *xbdev)
if (info && !bdev->bd_openers) {
xlvbd_release_gendisk(info);
disk->private_data = NULL;
- kfree(info);
+ mutex_lock(&blkfront_mutex);
+ free_info(info);
+ mutex_unlock(&blkfront_mutex);
}
mutex_unlock(&bdev->bd_mutex);
@@ -2585,7 +2612,7 @@ static void blkif_release(struct gendisk *disk, fmode_t mode)
dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
xlvbd_release_gendisk(info);
disk->private_data = NULL;
- kfree(info);
+ free_info(info);
}
out:
@@ -2618,6 +2645,61 @@ static struct xenbus_driver blkfront_driver = {
.is_ready = blkfront_is_ready,
};
+static void purge_persistent_grants(struct blkfront_info *info)
+{
+ unsigned int i;
+ unsigned long flags;
+
+ for (i = 0; i < info->nr_rings; i++) {
+ struct blkfront_ring_info *rinfo = &info->rinfo[i];
+ struct grant *gnt_list_entry, *tmp;
+
+ spin_lock_irqsave(&rinfo->ring_lock, flags);
+
+ if (rinfo->persistent_gnts_c == 0) {
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+ continue;
+ }
+
+ list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants,
+ node) {
+ if (gnt_list_entry->gref == GRANT_INVALID_REF ||
+ gnttab_query_foreign_access(gnt_list_entry->gref))
+ continue;
+
+ list_del(&gnt_list_entry->node);
+ gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
+ rinfo->persistent_gnts_c--;
+ __free_page(gnt_list_entry->page);
+ kfree(gnt_list_entry);
+ }
+
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+ }
+}
+
+static void blkfront_delay_work(struct work_struct *work)
+{
+ struct blkfront_info *info;
+ bool need_schedule_work = false;
+
+ mutex_lock(&blkfront_mutex);
+
+ list_for_each_entry(info, &info_list, info_list) {
+ if (info->feature_persistent) {
+ need_schedule_work = true;
+ mutex_lock(&info->mutex);
+ purge_persistent_grants(info);
+ mutex_unlock(&info->mutex);
+ }
+ }
+
+ if (need_schedule_work)
+ schedule_delayed_work(&blkfront_work, HZ * 10);
+
+ mutex_unlock(&blkfront_mutex);
+}
+
static int __init xlblk_init(void)
{
int ret;
@@ -2626,6 +2708,15 @@ static int __init xlblk_init(void)
if (!xen_domain())
return -ENODEV;
+ if (!xen_has_pv_disk_devices())
+ return -ENODEV;
+
+ if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
+ pr_warn("xen_blk: can't get major %d with name %s\n",
+ XENVBD_MAJOR, DEV_NAME);
+ return -ENODEV;
+ }
+
if (xen_blkif_max_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST)
xen_blkif_max_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
@@ -2641,14 +2732,7 @@ static int __init xlblk_init(void)
xen_blkif_max_queues = nr_cpus;
}
- if (!xen_has_pv_disk_devices())
- return -ENODEV;
-
- if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
- printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
- XENVBD_MAJOR, DEV_NAME);
- return -ENODEV;
- }
+ INIT_DELAYED_WORK(&blkfront_work, blkfront_delay_work);
ret = xenbus_register_frontend(&blkfront_driver);
if (ret) {
@@ -2663,6 +2747,8 @@ module_init(xlblk_init);
static void __exit xlblk_exit(void)
{
+ cancel_delayed_work_sync(&blkfront_work);
+
xenbus_unregister_driver(&blkfront_driver);
unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
kfree(minors);
diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index 2df11cc08a46..845b0314ce3a 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -200,6 +200,7 @@ config BT_HCIUART_RTL
depends on BT_HCIUART
depends on BT_HCIUART_SERDEV
depends on GPIOLIB
+ depends on ACPI
select BT_HCIUART_3WIRE
select BT_RTL
help
diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c
index ed2a5c7cb77f..4593baff2bc9 100644
--- a/drivers/bluetooth/btmtkuart.c
+++ b/drivers/bluetooth/btmtkuart.c
@@ -144,8 +144,10 @@ static int mtk_setup_fw(struct hci_dev *hdev)
fw_size = fw->size;
/* The size of patch header is 30 bytes, should be skip */
- if (fw_size < 30)
- return -EINVAL;
+ if (fw_size < 30) {
+ err = -EINVAL;
+ goto free_fw;
+ }
fw_size -= 30;
fw_ptr += 30;
@@ -172,8 +174,8 @@ static int mtk_setup_fw(struct hci_dev *hdev)
fw_ptr += dlen;
}
+free_fw:
release_firmware(fw);
-
return err;
}
diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index c9bac9dc4637..e4fe954e63a9 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -498,32 +498,29 @@ static int sysc_check_registers(struct sysc *ddata)
/**
* syc_ioremap - ioremap register space for the interconnect target module
- * @ddata: deviec driver data
+ * @ddata: device driver data
*
* Note that the interconnect target module registers can be anywhere
- * within the first child device address space. For example, SGX has
- * them at offset 0x1fc00 in the 32MB module address space. We just
- * what we need around the interconnect target module registers.
+ * within the interconnect target module range. For example, SGX has
+ * them at offset 0x1fc00 in the 32MB module address space. And cpsw
+ * has them at offset 0x1200 in the CPSW_WR child. Usually the
+ * the interconnect target module registers are at the beginning of
+ * the module range though.
*/
static int sysc_ioremap(struct sysc *ddata)
{
- u32 size = 0;
-
- if (ddata->offsets[SYSC_SYSSTATUS] >= 0)
- size = ddata->offsets[SYSC_SYSSTATUS];
- else if (ddata->offsets[SYSC_SYSCONFIG] >= 0)
- size = ddata->offsets[SYSC_SYSCONFIG];
- else if (ddata->offsets[SYSC_REVISION] >= 0)
- size = ddata->offsets[SYSC_REVISION];
- else
- return -EINVAL;
+ int size;
- size &= 0xfff00;
- size += SZ_256;
+ size = max3(ddata->offsets[SYSC_REVISION],
+ ddata->offsets[SYSC_SYSCONFIG],
+ ddata->offsets[SYSC_SYSSTATUS]);
+
+ if (size < 0 || (size + sizeof(u32)) > ddata->module_size)
+ return -EINVAL;
ddata->module_va = devm_ioremap(ddata->dev,
ddata->module_pa,
- size);
+ size + sizeof(u32));
if (!ddata->module_va)
return -EIO;
@@ -1224,10 +1221,10 @@ static int sysc_child_suspend_noirq(struct device *dev)
if (!pm_runtime_status_suspended(dev)) {
error = pm_generic_runtime_suspend(dev);
if (error) {
- dev_err(dev, "%s error at %i: %i\n",
- __func__, __LINE__, error);
+ dev_warn(dev, "%s busy at %i: %i\n",
+ __func__, __LINE__, error);
- return error;
+ return 0;
}
error = sysc_runtime_suspend(ddata->dev);
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 113fc6edb2b0..a5d5a96479bf 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2546,7 +2546,7 @@ static int cdrom_ioctl_drive_status(struct cdrom_device_info *cdi,
if (!CDROM_CAN(CDC_SELECT_DISC) ||
(arg == CDSL_CURRENT || arg == CDSL_NONE))
return cdi->ops->drive_status(cdi, CDSL_CURRENT);
- if (((int)arg >= cdi->capacity))
+ if (arg >= cdi->capacity)
return -EINVAL;
return cdrom_slot_status(cdi, arg);
}
diff --git a/drivers/clk/clk-npcm7xx.c b/drivers/clk/clk-npcm7xx.c
index 740af90a9508..c5edf8f2fd19 100644
--- a/drivers/clk/clk-npcm7xx.c
+++ b/drivers/clk/clk-npcm7xx.c
@@ -558,8 +558,8 @@ static void __init npcm7xx_clk_init(struct device_node *clk_np)
if (!clk_base)
goto npcm7xx_init_error;
- npcm7xx_clk_data = kzalloc(sizeof(*npcm7xx_clk_data->hws) *
- NPCM7XX_NUM_CLOCKS + sizeof(npcm7xx_clk_data), GFP_KERNEL);
+ npcm7xx_clk_data = kzalloc(struct_size(npcm7xx_clk_data, hws,
+ NPCM7XX_NUM_CLOCKS), GFP_KERNEL);
if (!npcm7xx_clk_data)
goto npcm7xx_init_np_err;
diff --git a/drivers/clk/x86/clk-st.c b/drivers/clk/x86/clk-st.c
index fb62f3938008..3a0996f2d556 100644
--- a/drivers/clk/x86/clk-st.c
+++ b/drivers/clk/x86/clk-st.c
@@ -46,7 +46,7 @@ static int st_clk_probe(struct platform_device *pdev)
clk_oscout1_parents, ARRAY_SIZE(clk_oscout1_parents),
0, st_data->base + CLKDRVSTR2, OSCOUT1CLK25MHZ, 3, 0, NULL);
- clk_set_parent(hws[ST_CLK_MUX]->clk, hws[ST_CLK_25M]->clk);
+ clk_set_parent(hws[ST_CLK_MUX]->clk, hws[ST_CLK_48M]->clk);
hws[ST_CLK_GATE] = clk_hw_register_gate(NULL, "oscout1", "oscout1_mux",
0, st_data->base + MISCCLKCNTL1, OSCCLKENB,
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 110483f0e3fb..e26a40971b26 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -379,9 +379,20 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
if (idx == -1)
idx = i; /* first enabled state */
if (s->target_residency > data->predicted_us) {
- if (!tick_nohz_tick_stopped())
+ if (data->predicted_us < TICK_USEC)
break;
+ if (!tick_nohz_tick_stopped()) {
+ /*
+ * If the state selected so far is shallow,
+ * waking up early won't hurt, so retain the
+ * tick in that case and let the governor run
+ * again in the next iteration of the loop.
+ */
+ expected_interval = drv->states[idx].target_residency;
+ break;
+ }
+
/*
* If the state selected so far is shallow and this
* state's target residency matches the time till the
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index 6e61cc93c2b0..d7aa7d7ff102 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -679,10 +679,8 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
int ret = 0;
if (keylen != 2 * AES_MIN_KEY_SIZE && keylen != 2 * AES_MAX_KEY_SIZE) {
- crypto_ablkcipher_set_flags(ablkcipher,
- CRYPTO_TFM_RES_BAD_KEY_LEN);
dev_err(jrdev, "key size mismatch\n");
- return -EINVAL;
+ goto badkey;
}
ctx->cdata.keylen = keylen;
@@ -715,7 +713,7 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
return ret;
badkey:
crypto_ablkcipher_set_flags(ablkcipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
- return 0;
+ return -EINVAL;
}
/*
diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index 578ea63a3109..f26d62e5533a 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -71,8 +71,8 @@ static void rsa_priv_f2_unmap(struct device *dev, struct rsa_edesc *edesc,
dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE);
dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE);
dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE);
- dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE);
- dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE);
+ dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL);
+ dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_BIDIRECTIONAL);
}
static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc,
@@ -90,8 +90,8 @@ static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc,
dma_unmap_single(dev, pdb->dp_dma, p_sz, DMA_TO_DEVICE);
dma_unmap_single(dev, pdb->dq_dma, q_sz, DMA_TO_DEVICE);
dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE);
- dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE);
- dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE);
+ dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL);
+ dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_BIDIRECTIONAL);
}
/* RSA Job Completion handler */
@@ -417,13 +417,13 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req,
goto unmap_p;
}
- pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE);
+ pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, pdb->tmp1_dma)) {
dev_err(dev, "Unable to map RSA tmp1 memory\n");
goto unmap_q;
}
- pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE);
+ pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, pdb->tmp2_dma)) {
dev_err(dev, "Unable to map RSA tmp2 memory\n");
goto unmap_tmp1;
@@ -451,7 +451,7 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req,
return 0;
unmap_tmp1:
- dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE);
+ dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL);
unmap_q:
dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE);
unmap_p:
@@ -504,13 +504,13 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req,
goto unmap_dq;
}
- pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE);
+ pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, pdb->tmp1_dma)) {
dev_err(dev, "Unable to map RSA tmp1 memory\n");
goto unmap_qinv;
}
- pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE);
+ pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, pdb->tmp2_dma)) {
dev_err(dev, "Unable to map RSA tmp2 memory\n");
goto unmap_tmp1;
@@ -538,7 +538,7 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req,
return 0;
unmap_tmp1:
- dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE);
+ dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL);
unmap_qinv:
dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE);
unmap_dq:
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index f4f258075b89..acdd72016ffe 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -190,7 +190,8 @@ static void caam_jr_dequeue(unsigned long devarg)
BUG_ON(CIRC_CNT(head, tail + i, JOBR_DEPTH) <= 0);
/* Unmap just-run descriptor so we can post-process */
- dma_unmap_single(dev, jrp->outring[hw_idx].desc,
+ dma_unmap_single(dev,
+ caam_dma_to_cpu(jrp->outring[hw_idx].desc),
jrp->entinfo[sw_idx].desc_size,
DMA_TO_DEVICE);
diff --git a/drivers/crypto/cavium/nitrox/nitrox_dev.h b/drivers/crypto/cavium/nitrox/nitrox_dev.h
index 9a476bb6d4c7..af596455b420 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_dev.h
+++ b/drivers/crypto/cavium/nitrox/nitrox_dev.h
@@ -35,6 +35,7 @@ struct nitrox_cmdq {
/* requests in backlog queues */
atomic_t backlog_count;
+ int write_idx;
/* command size 32B/64B */
u8 instr_size;
u8 qno;
@@ -87,7 +88,7 @@ struct nitrox_bh {
struct bh_data *slc;
};
-/* NITROX-5 driver state */
+/* NITROX-V driver state */
#define NITROX_UCODE_LOADED 0
#define NITROX_READY 1
diff --git a/drivers/crypto/cavium/nitrox/nitrox_lib.c b/drivers/crypto/cavium/nitrox/nitrox_lib.c
index ebe267379ac9..4d31df07777f 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_lib.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_lib.c
@@ -36,6 +36,7 @@ static int cmdq_common_init(struct nitrox_cmdq *cmdq)
cmdq->head = PTR_ALIGN(cmdq->head_unaligned, PKT_IN_ALIGN);
cmdq->dma = PTR_ALIGN(cmdq->dma_unaligned, PKT_IN_ALIGN);
cmdq->qsize = (qsize + PKT_IN_ALIGN);
+ cmdq->write_idx = 0;
spin_lock_init(&cmdq->response_lock);
spin_lock_init(&cmdq->cmdq_lock);
diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
index deaefd532aaa..4a362fc22f62 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
@@ -42,6 +42,16 @@
* Invalid flag options in AES-CCM IV.
*/
+static inline int incr_index(int index, int count, int max)
+{
+ if ((index + count) >= max)
+ index = index + count - max;
+ else
+ index += count;
+
+ return index;
+}
+
/**
* dma_free_sglist - unmap and free the sg lists.
* @ndev: N5 device
@@ -426,30 +436,29 @@ static void post_se_instr(struct nitrox_softreq *sr,
struct nitrox_cmdq *cmdq)
{
struct nitrox_device *ndev = sr->ndev;
- union nps_pkt_in_instr_baoff_dbell pkt_in_baoff_dbell;
- u64 offset;
+ int idx;
u8 *ent;
spin_lock_bh(&cmdq->cmdq_lock);
- /* get the next write offset */
- offset = NPS_PKT_IN_INSTR_BAOFF_DBELLX(cmdq->qno);
- pkt_in_baoff_dbell.value = nitrox_read_csr(ndev, offset);
+ idx = cmdq->write_idx;
/* copy the instruction */
- ent = cmdq->head + pkt_in_baoff_dbell.s.aoff;
+ ent = cmdq->head + (idx * cmdq->instr_size);
memcpy(ent, &sr->instr, cmdq->instr_size);
- /* flush the command queue updates */
- dma_wmb();
- sr->tstamp = jiffies;
atomic_set(&sr->status, REQ_POSTED);
response_list_add(sr, cmdq);
+ sr->tstamp = jiffies;
+ /* flush the command queue updates */
+ dma_wmb();
/* Ring doorbell with count 1 */
writeq(1, cmdq->dbell_csr_addr);
/* orders the doorbell rings */
mmiowb();
+ cmdq->write_idx = incr_index(idx, 1, ndev->qlen);
+
spin_unlock_bh(&cmdq->cmdq_lock);
}
@@ -459,6 +468,9 @@ static int post_backlog_cmds(struct nitrox_cmdq *cmdq)
struct nitrox_softreq *sr, *tmp;
int ret = 0;
+ if (!atomic_read(&cmdq->backlog_count))
+ return 0;
+
spin_lock_bh(&cmdq->backlog_lock);
list_for_each_entry_safe(sr, tmp, &cmdq->backlog_head, backlog) {
@@ -466,7 +478,7 @@ static int post_backlog_cmds(struct nitrox_cmdq *cmdq)
/* submit until space available */
if (unlikely(cmdq_full(cmdq, ndev->qlen))) {
- ret = -EBUSY;
+ ret = -ENOSPC;
break;
}
/* delete from backlog list */
@@ -491,23 +503,20 @@ static int nitrox_enqueue_request(struct nitrox_softreq *sr)
{
struct nitrox_cmdq *cmdq = sr->cmdq;
struct nitrox_device *ndev = sr->ndev;
- int ret = -EBUSY;
+
+ /* try to post backlog requests */
+ post_backlog_cmds(cmdq);
if (unlikely(cmdq_full(cmdq, ndev->qlen))) {
if (!(sr->flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
- return -EAGAIN;
-
+ return -ENOSPC;
+ /* add to backlog list */
backlog_list_add(sr, cmdq);
- } else {
- ret = post_backlog_cmds(cmdq);
- if (ret) {
- backlog_list_add(sr, cmdq);
- return ret;
- }
- post_se_instr(sr, cmdq);
- ret = -EINPROGRESS;
+ return -EBUSY;
}
- return ret;
+ post_se_instr(sr, cmdq);
+
+ return -EINPROGRESS;
}
/**
@@ -624,11 +633,9 @@ int nitrox_process_se_request(struct nitrox_device *ndev,
*/
sr->instr.fdata[0] = *((u64 *)&req->gph);
sr->instr.fdata[1] = 0;
- /* flush the soft_req changes before posting the cmd */
- wmb();
ret = nitrox_enqueue_request(sr);
- if (ret == -EAGAIN)
+ if (ret == -ENOSPC)
goto send_fail;
return ret;
diff --git a/drivers/crypto/chelsio/chtls/chtls.h b/drivers/crypto/chelsio/chtls/chtls.h
index a53a0e6ba024..7725b6ee14ef 100644
--- a/drivers/crypto/chelsio/chtls/chtls.h
+++ b/drivers/crypto/chelsio/chtls/chtls.h
@@ -96,6 +96,10 @@ enum csk_flags {
CSK_CONN_INLINE, /* Connection on HW */
};
+enum chtls_cdev_state {
+ CHTLS_CDEV_STATE_UP = 1
+};
+
struct listen_ctx {
struct sock *lsk;
struct chtls_dev *cdev;
@@ -146,6 +150,7 @@ struct chtls_dev {
unsigned int send_page_order;
int max_host_sndbuf;
struct key_map kmap;
+ unsigned int cdev_state;
};
struct chtls_hws {
diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c
index 9b07f9165658..f59b044ebd25 100644
--- a/drivers/crypto/chelsio/chtls/chtls_main.c
+++ b/drivers/crypto/chelsio/chtls/chtls_main.c
@@ -160,6 +160,7 @@ static void chtls_register_dev(struct chtls_dev *cdev)
tlsdev->hash = chtls_create_hash;
tlsdev->unhash = chtls_destroy_hash;
tls_register_device(&cdev->tlsdev);
+ cdev->cdev_state = CHTLS_CDEV_STATE_UP;
}
static void chtls_unregister_dev(struct chtls_dev *cdev)
@@ -281,8 +282,10 @@ static void chtls_free_all_uld(void)
struct chtls_dev *cdev, *tmp;
mutex_lock(&cdev_mutex);
- list_for_each_entry_safe(cdev, tmp, &cdev_list, list)
- chtls_free_uld(cdev);
+ list_for_each_entry_safe(cdev, tmp, &cdev_list, list) {
+ if (cdev->cdev_state == CHTLS_CDEV_STATE_UP)
+ chtls_free_uld(cdev);
+ }
mutex_unlock(&cdev_mutex);
}
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index 5285ece4f33a..b71895871be3 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -107,24 +107,23 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc,
ret = crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
} else {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
-
blkcipher_walk_init(&walk, dst, src, nbytes);
ret = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
aes_p8_cbc_encrypt(walk.src.virt.addr,
walk.dst.virt.addr,
nbytes & AES_BLOCK_MASK,
&ctx->enc_key, walk.iv, 1);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
nbytes &= AES_BLOCK_SIZE - 1;
ret = blkcipher_walk_done(desc, &walk, nbytes);
}
-
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
}
return ret;
@@ -147,24 +146,23 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc,
ret = crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
} else {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
-
blkcipher_walk_init(&walk, dst, src, nbytes);
ret = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
aes_p8_cbc_encrypt(walk.src.virt.addr,
walk.dst.virt.addr,
nbytes & AES_BLOCK_MASK,
&ctx->dec_key, walk.iv, 0);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
nbytes &= AES_BLOCK_SIZE - 1;
ret = blkcipher_walk_done(desc, &walk, nbytes);
}
-
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
}
return ret;
diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c
index 8bd9aff0f55f..e9954a7d4694 100644
--- a/drivers/crypto/vmx/aes_xts.c
+++ b/drivers/crypto/vmx/aes_xts.c
@@ -116,32 +116,39 @@ static int p8_aes_xts_crypt(struct blkcipher_desc *desc,
ret = enc? crypto_skcipher_encrypt(req) : crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
} else {
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+
+ ret = blkcipher_walk_virt(desc, &walk);
+
preempt_disable();
pagefault_disable();
enable_kernel_vsx();
- blkcipher_walk_init(&walk, dst, src, nbytes);
-
- ret = blkcipher_walk_virt(desc, &walk);
iv = walk.iv;
memset(tweak, 0, AES_BLOCK_SIZE);
aes_p8_encrypt(iv, tweak, &ctx->tweak_key);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
while ((nbytes = walk.nbytes)) {
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
if (enc)
aes_p8_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
nbytes & AES_BLOCK_MASK, &ctx->enc_key, NULL, tweak);
else
aes_p8_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr,
nbytes & AES_BLOCK_MASK, &ctx->dec_key, NULL, tweak);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
nbytes &= AES_BLOCK_SIZE - 1;
ret = blkcipher_walk_done(desc, &walk, nbytes);
}
-
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
}
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 502b94fb116a..b6e9df11115d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1012,13 +1012,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
if (r)
return r;
- if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
- parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
- if (!parser->ctx->preamble_presented) {
- parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
- parser->ctx->preamble_presented = true;
- }
- }
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
+ parser->job->preamble_status |=
+ AMDGPU_PREAMBLE_IB_PRESENT;
if (parser->ring && parser->ring != ring)
return -EINVAL;
@@ -1207,26 +1203,24 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
int r;
+ job = p->job;
+ p->job = NULL;
+
+ r = drm_sched_job_init(&job->base, entity, p->filp);
+ if (r)
+ goto error_unlock;
+
+ /* No memory allocation is allowed while holding the mn lock */
amdgpu_mn_lock(p->mn);
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = e->robj;
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
- amdgpu_mn_unlock(p->mn);
- return -ERESTARTSYS;
+ r = -ERESTARTSYS;
+ goto error_abort;
}
}
- job = p->job;
- p->job = NULL;
-
- r = drm_sched_job_init(&job->base, entity, p->filp);
- if (r) {
- amdgpu_job_free(job);
- amdgpu_mn_unlock(p->mn);
- return r;
- }
-
job->owner = p->filp;
p->fence = dma_fence_get(&job->base.s_fence->finished);
@@ -1241,6 +1235,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_cs_post_dependencies(p);
+ if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
+ !p->ctx->preamble_presented) {
+ job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
+ p->ctx->preamble_presented = true;
+ }
+
cs->out.handle = seq;
job->uf_sequence = seq;
@@ -1258,6 +1258,15 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_mn_unlock(p->mn);
return 0;
+
+error_abort:
+ dma_fence_put(&job->base.s_fence->finished);
+ job->base.s_fence = NULL;
+
+error_unlock:
+ amdgpu_job_free(job);
+ amdgpu_mn_unlock(p->mn);
+ return r;
}
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 5518e623fed2..51b5e977ca88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -164,8 +164,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
return r;
}
+ need_ctx_switch = ring->current_ctx != fence_ctx;
if (ring->funcs->emit_pipeline_sync && job &&
((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) ||
+ (amdgpu_sriov_vf(adev) && need_ctx_switch) ||
amdgpu_vm_need_pipeline_sync(ring, job))) {
need_pipe_sync = true;
dma_fence_put(tmp);
@@ -196,7 +198,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
}
skip_preamble = ring->current_ctx == fence_ctx;
- need_ctx_switch = ring->current_ctx != fence_ctx;
if (job && ring->funcs->emit_cntxcntl) {
if (need_ctx_switch)
status |= AMDGPU_HAVE_CTX_SWITCH;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 8f98629fbe59..7b4e657a95c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -1932,14 +1932,6 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
amdgpu_fence_wait_empty(ring);
}
- mutex_lock(&adev->pm.mutex);
- /* update battery/ac status */
- if (power_supply_is_system_supplied() > 0)
- adev->pm.ac_power = true;
- else
- adev->pm.ac_power = false;
- mutex_unlock(&adev->pm.mutex);
-
if (adev->powerplay.pp_funcs->dispatch_tasks) {
if (!amdgpu_device_has_dc_support(adev)) {
mutex_lock(&adev->pm.mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ece0ac703e27..b17771dd5ce7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -172,6 +172,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
* is validated on next vm use to avoid fault.
* */
list_move_tail(&base->vm_status, &vm->evicted);
+ base->moved = true;
}
/**
@@ -369,7 +370,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
uint64_t addr;
int r;
- addr = amdgpu_bo_gpu_offset(bo);
entries = amdgpu_bo_size(bo) / 8;
if (pte_support_ats) {
@@ -401,6 +401,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
if (r)
goto error;
+ addr = amdgpu_bo_gpu_offset(bo);
if (ats_entries) {
uint64_t ats_value;
@@ -2483,28 +2484,52 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
* amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
*
* @adev: amdgpu_device pointer
- * @vm_size: the default vm size if it's set auto
+ * @min_vm_size: the minimum vm size in GB if it's set auto
* @fragment_size_default: Default PTE fragment size
* @max_level: max VMPT level
* @max_bits: max address space size in bits
*
*/
-void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
uint32_t fragment_size_default, unsigned max_level,
unsigned max_bits)
{
+ unsigned int max_size = 1 << (max_bits - 30);
+ unsigned int vm_size;
uint64_t tmp;
/* adjust vm size first */
if (amdgpu_vm_size != -1) {
- unsigned max_size = 1 << (max_bits - 30);
-
vm_size = amdgpu_vm_size;
if (vm_size > max_size) {
dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
amdgpu_vm_size, max_size);
vm_size = max_size;
}
+ } else {
+ struct sysinfo si;
+ unsigned int phys_ram_gb;
+
+ /* Optimal VM size depends on the amount of physical
+ * RAM available. Underlying requirements and
+ * assumptions:
+ *
+ * - Need to map system memory and VRAM from all GPUs
+ * - VRAM from other GPUs not known here
+ * - Assume VRAM <= system memory
+ * - On GFX8 and older, VM space can be segmented for
+ * different MTYPEs
+ * - Need to allow room for fragmentation, guard pages etc.
+ *
+ * This adds up to a rough guess of system memory x3.
+ * Round up to power of two to maximize the available
+ * VM size with the given page table size.
+ */
+ si_meminfo(&si);
+ phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit +
+ (1 << 30) - 1) >> 30;
+ vm_size = roundup_pow_of_two(
+ min(max(phys_ram_gb * 3, min_vm_size), max_size));
}
adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 67a15d439ac0..9fa9df0c5e7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -321,7 +321,7 @@ struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket);
void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va);
-void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
uint32_t fragment_size_default, unsigned max_level,
unsigned max_bits);
int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 5cd45210113f..5a9534a82d40 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -5664,6 +5664,11 @@ static int gfx_v8_0_set_powergating_state(void *handle,
if (amdgpu_sriov_vf(adev))
return 0;
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_RLC_SMU_HS |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GFX_DMG))
+ adev->gfx.rlc.funcs->enter_safe_mode(adev);
switch (adev->asic_type) {
case CHIP_CARRIZO:
case CHIP_STONEY:
@@ -5713,7 +5718,11 @@ static int gfx_v8_0_set_powergating_state(void *handle,
default:
break;
}
-
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_RLC_SMU_HS |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GFX_DMG))
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 75317f283c69..ad151fefa41f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -632,12 +632,6 @@ static void gmc_v6_0_gart_disable(struct amdgpu_device *adev)
amdgpu_gart_table_vram_unpin(adev);
}
-static void gmc_v6_0_gart_fini(struct amdgpu_device *adev)
-{
- amdgpu_gart_table_vram_free(adev);
- amdgpu_gart_fini(adev);
-}
-
static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
u32 status, u32 addr, u32 mc_client)
{
@@ -935,8 +929,9 @@ static int gmc_v6_0_sw_fini(void *handle)
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
- gmc_v6_0_gart_fini(adev);
+ amdgpu_gart_table_vram_free(adev);
amdgpu_bo_fini(adev);
+ amdgpu_gart_fini(adev);
release_firmware(adev->gmc.fw);
adev->gmc.fw = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 36dc367c4b45..f8d8a3a73e42 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -747,19 +747,6 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev)
}
/**
- * gmc_v7_0_gart_fini - vm fini callback
- *
- * @adev: amdgpu_device pointer
- *
- * Tears down the driver GART/VM setup (CIK).
- */
-static void gmc_v7_0_gart_fini(struct amdgpu_device *adev)
-{
- amdgpu_gart_table_vram_free(adev);
- amdgpu_gart_fini(adev);
-}
-
-/**
* gmc_v7_0_vm_decode_fault - print human readable fault info
*
* @adev: amdgpu_device pointer
@@ -1095,8 +1082,9 @@ static int gmc_v7_0_sw_fini(void *handle)
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
kfree(adev->gmc.vm_fault_info);
- gmc_v7_0_gart_fini(adev);
+ amdgpu_gart_table_vram_free(adev);
amdgpu_bo_fini(adev);
+ amdgpu_gart_fini(adev);
release_firmware(adev->gmc.fw);
adev->gmc.fw = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 70fc97b59b4f..9333109b210d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -969,19 +969,6 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
}
/**
- * gmc_v8_0_gart_fini - vm fini callback
- *
- * @adev: amdgpu_device pointer
- *
- * Tears down the driver GART/VM setup (CIK).
- */
-static void gmc_v8_0_gart_fini(struct amdgpu_device *adev)
-{
- amdgpu_gart_table_vram_free(adev);
- amdgpu_gart_fini(adev);
-}
-
-/**
* gmc_v8_0_vm_decode_fault - print human readable fault info
*
* @adev: amdgpu_device pointer
@@ -1199,8 +1186,9 @@ static int gmc_v8_0_sw_fini(void *handle)
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
kfree(adev->gmc.vm_fault_info);
- gmc_v8_0_gart_fini(adev);
+ amdgpu_gart_table_vram_free(adev);
amdgpu_bo_fini(adev);
+ amdgpu_gart_fini(adev);
release_firmware(adev->gmc.fw);
adev->gmc.fw = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 399a5db27649..72f8018fa2a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -942,26 +942,12 @@ static int gmc_v9_0_sw_init(void *handle)
return 0;
}
-/**
- * gmc_v9_0_gart_fini - vm fini callback
- *
- * @adev: amdgpu_device pointer
- *
- * Tears down the driver GART/VM setup (CIK).
- */
-static void gmc_v9_0_gart_fini(struct amdgpu_device *adev)
-{
- amdgpu_gart_table_vram_free(adev);
- amdgpu_gart_fini(adev);
-}
-
static int gmc_v9_0_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
- gmc_v9_0_gart_fini(adev);
/*
* TODO:
@@ -974,7 +960,9 @@ static int gmc_v9_0_sw_fini(void *handle)
*/
amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
+ amdgpu_gart_table_vram_free(adev);
amdgpu_bo_fini(adev);
+ amdgpu_gart_fini(adev);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index 3f57f6463dc8..cb79a93c2eb7 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -65,8 +65,6 @@ static int kv_set_thermal_temperature_range(struct amdgpu_device *adev,
int min_temp, int max_temp);
static int kv_init_fps_limits(struct amdgpu_device *adev);
-static void kv_dpm_powergate_uvd(void *handle, bool gate);
-static void kv_dpm_powergate_vce(struct amdgpu_device *adev, bool gate);
static void kv_dpm_powergate_samu(struct amdgpu_device *adev, bool gate);
static void kv_dpm_powergate_acp(struct amdgpu_device *adev, bool gate);
@@ -1354,8 +1352,6 @@ static int kv_dpm_enable(struct amdgpu_device *adev)
return ret;
}
- kv_update_current_ps(adev, adev->pm.dpm.boot_ps);
-
if (adev->irq.installed &&
amdgpu_is_internal_thermal_sensor(adev->pm.int_thermal_type)) {
ret = kv_set_thermal_temperature_range(adev, KV_TEMP_RANGE_MIN, KV_TEMP_RANGE_MAX);
@@ -1374,6 +1370,8 @@ static int kv_dpm_enable(struct amdgpu_device *adev)
static void kv_dpm_disable(struct amdgpu_device *adev)
{
+ struct kv_power_info *pi = kv_get_pi(adev);
+
amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq,
AMDGPU_THERMAL_IRQ_LOW_TO_HIGH);
amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq,
@@ -1387,8 +1385,10 @@ static void kv_dpm_disable(struct amdgpu_device *adev)
/* powerup blocks */
kv_dpm_powergate_acp(adev, false);
kv_dpm_powergate_samu(adev, false);
- kv_dpm_powergate_vce(adev, false);
- kv_dpm_powergate_uvd(adev, false);
+ if (pi->caps_vce_pg) /* power on the VCE block */
+ amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerON);
+ if (pi->caps_uvd_pg) /* power on the UVD block */
+ amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_UVDPowerON);
kv_enable_smc_cac(adev, false);
kv_enable_didt(adev, false);
@@ -1551,7 +1551,6 @@ static int kv_update_vce_dpm(struct amdgpu_device *adev,
int ret;
if (amdgpu_new_state->evclk > 0 && amdgpu_current_state->evclk == 0) {
- kv_dpm_powergate_vce(adev, false);
if (pi->caps_stable_p_state)
pi->vce_boot_level = table->count - 1;
else
@@ -1573,7 +1572,6 @@ static int kv_update_vce_dpm(struct amdgpu_device *adev,
kv_enable_vce_dpm(adev, true);
} else if (amdgpu_new_state->evclk == 0 && amdgpu_current_state->evclk > 0) {
kv_enable_vce_dpm(adev, false);
- kv_dpm_powergate_vce(adev, true);
}
return 0;
@@ -1702,24 +1700,32 @@ static void kv_dpm_powergate_uvd(void *handle, bool gate)
}
}
-static void kv_dpm_powergate_vce(struct amdgpu_device *adev, bool gate)
+static void kv_dpm_powergate_vce(void *handle, bool gate)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct kv_power_info *pi = kv_get_pi(adev);
-
- if (pi->vce_power_gated == gate)
- return;
+ int ret;
pi->vce_power_gated = gate;
- if (!pi->caps_vce_pg)
- return;
-
- if (gate)
- amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerOFF);
- else
- amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerON);
+ if (gate) {
+ /* stop the VCE block */
+ ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_PG_STATE_GATE);
+ kv_enable_vce_dpm(adev, false);
+ if (pi->caps_vce_pg) /* power off the VCE block */
+ amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerOFF);
+ } else {
+ if (pi->caps_vce_pg) /* power on the VCE block */
+ amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerON);
+ kv_enable_vce_dpm(adev, true);
+ /* re-init the VCE block */
+ ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_PG_STATE_UNGATE);
+ }
}
+
static void kv_dpm_powergate_samu(struct amdgpu_device *adev, bool gate)
{
struct kv_power_info *pi = kv_get_pi(adev);
@@ -3061,7 +3067,7 @@ static int kv_dpm_hw_init(void *handle)
else
adev->pm.dpm_enabled = true;
mutex_unlock(&adev->pm.mutex);
-
+ amdgpu_pm_compute_clocks(adev);
return ret;
}
@@ -3313,6 +3319,9 @@ static int kv_set_powergating_by_smu(void *handle,
case AMD_IP_BLOCK_TYPE_UVD:
kv_dpm_powergate_uvd(handle, gate);
break;
+ case AMD_IP_BLOCK_TYPE_VCE:
+ kv_dpm_powergate_vce(handle, gate);
+ break;
default:
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index db327b412562..1de96995e690 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -6887,7 +6887,6 @@ static int si_dpm_enable(struct amdgpu_device *adev)
si_enable_auto_throttle_source(adev, AMDGPU_DPM_AUTO_THROTTLE_SRC_THERMAL, true);
si_thermal_start_thermal_controller(adev);
- ni_update_current_ps(adev, boot_ps);
return 0;
}
@@ -7763,7 +7762,7 @@ static int si_dpm_hw_init(void *handle)
else
adev->pm.dpm_enabled = true;
mutex_unlock(&adev->pm.mutex);
-
+ amdgpu_pm_compute_clocks(adev);
return ret;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
index fbe878ae1e8c..4ba0003a9d32 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
@@ -480,12 +480,20 @@ void pp_rv_set_display_requirement(struct pp_smu *pp,
{
struct dc_context *ctx = pp->ctx;
struct amdgpu_device *adev = ctx->driver_context;
+ void *pp_handle = adev->powerplay.pp_handle;
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ struct pp_display_clock_request clock = {0};
- if (!pp_funcs || !pp_funcs->display_configuration_changed)
+ if (!pp_funcs || !pp_funcs->display_clock_voltage_request)
return;
- amdgpu_dpm_display_configuration_changed(adev);
+ clock.clock_type = amd_pp_dcf_clock;
+ clock.clock_freq_in_khz = req->hard_min_dcefclk_khz;
+ pp_funcs->display_clock_voltage_request(pp_handle, &clock);
+
+ clock.clock_type = amd_pp_f_clock;
+ clock.clock_freq_in_khz = req->hard_min_fclk_khz;
+ pp_funcs->display_clock_voltage_request(pp_handle, &clock);
}
void pp_rv_set_wm_ranges(struct pp_smu *pp,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 567867915d32..37eaf72ace54 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -754,8 +754,12 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
* fail-safe mode
*/
if (dc_is_hdmi_signal(link->connector_signal) ||
- dc_is_dvi_signal(link->connector_signal))
+ dc_is_dvi_signal(link->connector_signal)) {
+ if (prev_sink != NULL)
+ dc_sink_release(prev_sink);
+
return false;
+ }
default:
break;
}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 11d834f94220..98358b4b36de 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -199,7 +199,6 @@ vma_create(struct drm_i915_gem_object *obj,
vma->flags |= I915_VMA_GGTT;
list_add(&vma->obj_link, &obj->vma_list);
} else {
- i915_ppgtt_get(i915_vm_to_ppgtt(vm));
list_add_tail(&vma->obj_link, &obj->vma_list);
}
@@ -807,9 +806,6 @@ static void __i915_vma_destroy(struct i915_vma *vma)
if (vma->obj)
rb_erase(&vma->obj_node, &vma->obj->vma_tree);
- if (!i915_vma_is_ggtt(vma))
- i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
-
rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
GEM_BUG_ON(i915_gem_active_isset(&iter->base));
kfree(iter);
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index b725835b47ef..769f3f586661 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -962,9 +962,6 @@ void i915_audio_component_init(struct drm_i915_private *dev_priv)
{
int ret;
- if (INTEL_INFO(dev_priv)->num_pipes == 0)
- return;
-
ret = component_add(dev_priv->drm.dev, &i915_audio_component_bind_ops);
if (ret < 0) {
DRM_ERROR("failed to add audio component (%d)\n", ret);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index ed3fa1c8a983..4a3c8ee9a973 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2988,6 +2988,7 @@ static int skl_check_main_surface(const struct intel_crtc_state *crtc_state,
int w = drm_rect_width(&plane_state->base.src) >> 16;
int h = drm_rect_height(&plane_state->base.src) >> 16;
int dst_x = plane_state->base.dst.x1;
+ int dst_w = drm_rect_width(&plane_state->base.dst);
int pipe_src_w = crtc_state->pipe_src_w;
int max_width = skl_max_plane_width(fb, 0, rotation);
int max_height = 4096;
@@ -3009,10 +3010,10 @@ static int skl_check_main_surface(const struct intel_crtc_state *crtc_state,
* screen may cause FIFO underflow and display corruption.
*/
if ((IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) &&
- (dst_x + w < 4 || dst_x > pipe_src_w - 4)) {
+ (dst_x + dst_w < 4 || dst_x > pipe_src_w - 4)) {
DRM_DEBUG_KMS("requested plane X %s position %d invalid (valid range %d-%d)\n",
- dst_x + w < 4 ? "end" : "start",
- dst_x + w < 4 ? dst_x + w : dst_x,
+ dst_x + dst_w < 4 ? "end" : "start",
+ dst_x + dst_w < 4 ? dst_x + dst_w : dst_x,
4, pipe_src_w - 4);
return -ERANGE;
}
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index a9076402dcb0..192972a7d287 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -943,8 +943,12 @@ static int intel_hdmi_hdcp_write(struct intel_digital_port *intel_dig_port,
ret = i2c_transfer(adapter, &msg, 1);
if (ret == 1)
- return 0;
- return ret >= 0 ? -EIO : ret;
+ ret = 0;
+ else if (ret >= 0)
+ ret = -EIO;
+
+ kfree(write_buf);
+ return ret;
}
static
diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c
index 5dae16ccd9f1..3e085c5f2b81 100644
--- a/drivers/gpu/drm/i915/intel_lspcon.c
+++ b/drivers/gpu/drm/i915/intel_lspcon.c
@@ -74,7 +74,7 @@ static enum drm_lspcon_mode lspcon_wait_mode(struct intel_lspcon *lspcon,
DRM_DEBUG_KMS("Waiting for LSPCON mode %s to settle\n",
lspcon_mode_name(mode));
- wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode, 100);
+ wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode, 400);
if (current_mode != mode)
DRM_ERROR("LSPCON mode hasn't settled\n");
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
index 978782a77629..28d191192945 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
@@ -132,6 +132,11 @@ static void mtk_ovl_config(struct mtk_ddp_comp *comp, unsigned int w,
writel(0x0, comp->regs + DISP_REG_OVL_RST);
}
+static unsigned int mtk_ovl_layer_nr(struct mtk_ddp_comp *comp)
+{
+ return 4;
+}
+
static void mtk_ovl_layer_on(struct mtk_ddp_comp *comp, unsigned int idx)
{
unsigned int reg;
@@ -157,6 +162,11 @@ static void mtk_ovl_layer_off(struct mtk_ddp_comp *comp, unsigned int idx)
static unsigned int ovl_fmt_convert(struct mtk_disp_ovl *ovl, unsigned int fmt)
{
+ /* The return value in switch "MEM_MODE_INPUT_FORMAT_XXX"
+ * is defined in mediatek HW data sheet.
+ * The alphabet order in XXX is no relation to data
+ * arrangement in memory.
+ */
switch (fmt) {
default:
case DRM_FORMAT_RGB565:
@@ -221,6 +231,7 @@ static const struct mtk_ddp_comp_funcs mtk_disp_ovl_funcs = {
.stop = mtk_ovl_stop,
.enable_vblank = mtk_ovl_enable_vblank,
.disable_vblank = mtk_ovl_disable_vblank,
+ .layer_nr = mtk_ovl_layer_nr,
.layer_on = mtk_ovl_layer_on,
.layer_off = mtk_ovl_layer_off,
.layer_config = mtk_ovl_layer_config,
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
index 585943c81e1f..b0a5cffe345a 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
@@ -31,14 +31,31 @@
#define RDMA_REG_UPDATE_INT BIT(0)
#define DISP_REG_RDMA_GLOBAL_CON 0x0010
#define RDMA_ENGINE_EN BIT(0)
+#define RDMA_MODE_MEMORY BIT(1)
#define DISP_REG_RDMA_SIZE_CON_0 0x0014
+#define RDMA_MATRIX_ENABLE BIT(17)
+#define RDMA_MATRIX_INT_MTX_SEL GENMASK(23, 20)
+#define RDMA_MATRIX_INT_MTX_BT601_to_RGB (6 << 20)
#define DISP_REG_RDMA_SIZE_CON_1 0x0018
#define DISP_REG_RDMA_TARGET_LINE 0x001c
+#define DISP_RDMA_MEM_CON 0x0024
+#define MEM_MODE_INPUT_FORMAT_RGB565 (0x000 << 4)
+#define MEM_MODE_INPUT_FORMAT_RGB888 (0x001 << 4)
+#define MEM_MODE_INPUT_FORMAT_RGBA8888 (0x002 << 4)
+#define MEM_MODE_INPUT_FORMAT_ARGB8888 (0x003 << 4)
+#define MEM_MODE_INPUT_FORMAT_UYVY (0x004 << 4)
+#define MEM_MODE_INPUT_FORMAT_YUYV (0x005 << 4)
+#define MEM_MODE_INPUT_SWAP BIT(8)
+#define DISP_RDMA_MEM_SRC_PITCH 0x002c
+#define DISP_RDMA_MEM_GMC_SETTING_0 0x0030
#define DISP_REG_RDMA_FIFO_CON 0x0040
#define RDMA_FIFO_UNDERFLOW_EN BIT(31)
#define RDMA_FIFO_PSEUDO_SIZE(bytes) (((bytes) / 16) << 16)
#define RDMA_OUTPUT_VALID_FIFO_THRESHOLD(bytes) ((bytes) / 16)
#define RDMA_FIFO_SIZE(rdma) ((rdma)->data->fifo_size)
+#define DISP_RDMA_MEM_START_ADDR 0x0f00
+
+#define RDMA_MEM_GMC 0x40402020
struct mtk_disp_rdma_data {
unsigned int fifo_size;
@@ -138,12 +155,87 @@ static void mtk_rdma_config(struct mtk_ddp_comp *comp, unsigned int width,
writel(reg, comp->regs + DISP_REG_RDMA_FIFO_CON);
}
+static unsigned int rdma_fmt_convert(struct mtk_disp_rdma *rdma,
+ unsigned int fmt)
+{
+ /* The return value in switch "MEM_MODE_INPUT_FORMAT_XXX"
+ * is defined in mediatek HW data sheet.
+ * The alphabet order in XXX is no relation to data
+ * arrangement in memory.
+ */
+ switch (fmt) {
+ default:
+ case DRM_FORMAT_RGB565:
+ return MEM_MODE_INPUT_FORMAT_RGB565;
+ case DRM_FORMAT_BGR565:
+ return MEM_MODE_INPUT_FORMAT_RGB565 | MEM_MODE_INPUT_SWAP;
+ case DRM_FORMAT_RGB888:
+ return MEM_MODE_INPUT_FORMAT_RGB888;
+ case DRM_FORMAT_BGR888:
+ return MEM_MODE_INPUT_FORMAT_RGB888 | MEM_MODE_INPUT_SWAP;
+ case DRM_FORMAT_RGBX8888:
+ case DRM_FORMAT_RGBA8888:
+ return MEM_MODE_INPUT_FORMAT_ARGB8888;
+ case DRM_FORMAT_BGRX8888:
+ case DRM_FORMAT_BGRA8888:
+ return MEM_MODE_INPUT_FORMAT_ARGB8888 | MEM_MODE_INPUT_SWAP;
+ case DRM_FORMAT_XRGB8888:
+ case DRM_FORMAT_ARGB8888:
+ return MEM_MODE_INPUT_FORMAT_RGBA8888;
+ case DRM_FORMAT_XBGR8888:
+ case DRM_FORMAT_ABGR8888:
+ return MEM_MODE_INPUT_FORMAT_RGBA8888 | MEM_MODE_INPUT_SWAP;
+ case DRM_FORMAT_UYVY:
+ return MEM_MODE_INPUT_FORMAT_UYVY;
+ case DRM_FORMAT_YUYV:
+ return MEM_MODE_INPUT_FORMAT_YUYV;
+ }
+}
+
+static unsigned int mtk_rdma_layer_nr(struct mtk_ddp_comp *comp)
+{
+ return 1;
+}
+
+static void mtk_rdma_layer_config(struct mtk_ddp_comp *comp, unsigned int idx,
+ struct mtk_plane_state *state)
+{
+ struct mtk_disp_rdma *rdma = comp_to_rdma(comp);
+ struct mtk_plane_pending_state *pending = &state->pending;
+ unsigned int addr = pending->addr;
+ unsigned int pitch = pending->pitch & 0xffff;
+ unsigned int fmt = pending->format;
+ unsigned int con;
+
+ con = rdma_fmt_convert(rdma, fmt);
+ writel_relaxed(con, comp->regs + DISP_RDMA_MEM_CON);
+
+ if (fmt == DRM_FORMAT_UYVY || fmt == DRM_FORMAT_YUYV) {
+ rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0,
+ RDMA_MATRIX_ENABLE, RDMA_MATRIX_ENABLE);
+ rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0,
+ RDMA_MATRIX_INT_MTX_SEL,
+ RDMA_MATRIX_INT_MTX_BT601_to_RGB);
+ } else {
+ rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0,
+ RDMA_MATRIX_ENABLE, 0);
+ }
+
+ writel_relaxed(addr, comp->regs + DISP_RDMA_MEM_START_ADDR);
+ writel_relaxed(pitch, comp->regs + DISP_RDMA_MEM_SRC_PITCH);
+ writel(RDMA_MEM_GMC, comp->regs + DISP_RDMA_MEM_GMC_SETTING_0);
+ rdma_update_bits(comp, DISP_REG_RDMA_GLOBAL_CON,
+ RDMA_MODE_MEMORY, RDMA_MODE_MEMORY);
+}
+
static const struct mtk_ddp_comp_funcs mtk_disp_rdma_funcs = {
.config = mtk_rdma_config,
.start = mtk_rdma_start,
.stop = mtk_rdma_stop,
.enable_vblank = mtk_rdma_enable_vblank,
.disable_vblank = mtk_rdma_disable_vblank,
+ .layer_nr = mtk_rdma_layer_nr,
+ .layer_config = mtk_rdma_layer_config,
};
static int mtk_disp_rdma_bind(struct device *dev, struct device *master,
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index 2d6aa150a9ff..0b976dfd04df 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -45,7 +45,8 @@ struct mtk_drm_crtc {
bool pending_needs_vblank;
struct drm_pending_vblank_event *event;
- struct drm_plane planes[OVL_LAYER_NR];
+ struct drm_plane *planes;
+ unsigned int layer_nr;
bool pending_planes;
void __iomem *config_regs;
@@ -171,9 +172,9 @@ static void mtk_drm_crtc_mode_set_nofb(struct drm_crtc *crtc)
static int mtk_drm_crtc_enable_vblank(struct drm_crtc *crtc)
{
struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
- struct mtk_ddp_comp *ovl = mtk_crtc->ddp_comp[0];
+ struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0];
- mtk_ddp_comp_enable_vblank(ovl, &mtk_crtc->base);
+ mtk_ddp_comp_enable_vblank(comp, &mtk_crtc->base);
return 0;
}
@@ -181,9 +182,9 @@ static int mtk_drm_crtc_enable_vblank(struct drm_crtc *crtc)
static void mtk_drm_crtc_disable_vblank(struct drm_crtc *crtc)
{
struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
- struct mtk_ddp_comp *ovl = mtk_crtc->ddp_comp[0];
+ struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0];
- mtk_ddp_comp_disable_vblank(ovl);
+ mtk_ddp_comp_disable_vblank(comp);
}
static int mtk_crtc_ddp_clk_enable(struct mtk_drm_crtc *mtk_crtc)
@@ -286,7 +287,7 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc)
}
/* Initially configure all planes */
- for (i = 0; i < OVL_LAYER_NR; i++) {
+ for (i = 0; i < mtk_crtc->layer_nr; i++) {
struct drm_plane *plane = &mtk_crtc->planes[i];
struct mtk_plane_state *plane_state;
@@ -334,7 +335,7 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc)
{
struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
struct mtk_crtc_state *state = to_mtk_crtc_state(mtk_crtc->base.state);
- struct mtk_ddp_comp *ovl = mtk_crtc->ddp_comp[0];
+ struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0];
unsigned int i;
/*
@@ -343,7 +344,7 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc)
* queue update module registers on vblank.
*/
if (state->pending_config) {
- mtk_ddp_comp_config(ovl, state->pending_width,
+ mtk_ddp_comp_config(comp, state->pending_width,
state->pending_height,
state->pending_vrefresh, 0);
@@ -351,14 +352,14 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc)
}
if (mtk_crtc->pending_planes) {
- for (i = 0; i < OVL_LAYER_NR; i++) {
+ for (i = 0; i < mtk_crtc->layer_nr; i++) {
struct drm_plane *plane = &mtk_crtc->planes[i];
struct mtk_plane_state *plane_state;
plane_state = to_mtk_plane_state(plane->state);
if (plane_state->pending.config) {
- mtk_ddp_comp_layer_config(ovl, i, plane_state);
+ mtk_ddp_comp_layer_config(comp, i, plane_state);
plane_state->pending.config = false;
}
}
@@ -370,12 +371,12 @@ static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc,
struct drm_crtc_state *old_state)
{
struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
- struct mtk_ddp_comp *ovl = mtk_crtc->ddp_comp[0];
+ struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0];
int ret;
DRM_DEBUG_DRIVER("%s %d\n", __func__, crtc->base.id);
- ret = mtk_smi_larb_get(ovl->larb_dev);
+ ret = mtk_smi_larb_get(comp->larb_dev);
if (ret) {
DRM_ERROR("Failed to get larb: %d\n", ret);
return;
@@ -383,7 +384,7 @@ static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc,
ret = mtk_crtc_ddp_hw_init(mtk_crtc);
if (ret) {
- mtk_smi_larb_put(ovl->larb_dev);
+ mtk_smi_larb_put(comp->larb_dev);
return;
}
@@ -395,7 +396,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc,
struct drm_crtc_state *old_state)
{
struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
- struct mtk_ddp_comp *ovl = mtk_crtc->ddp_comp[0];
+ struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0];
int i;
DRM_DEBUG_DRIVER("%s %d\n", __func__, crtc->base.id);
@@ -403,7 +404,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc,
return;
/* Set all pending plane state to disabled */
- for (i = 0; i < OVL_LAYER_NR; i++) {
+ for (i = 0; i < mtk_crtc->layer_nr; i++) {
struct drm_plane *plane = &mtk_crtc->planes[i];
struct mtk_plane_state *plane_state;
@@ -418,7 +419,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc,
drm_crtc_vblank_off(crtc);
mtk_crtc_ddp_hw_fini(mtk_crtc);
- mtk_smi_larb_put(ovl->larb_dev);
+ mtk_smi_larb_put(comp->larb_dev);
mtk_crtc->enabled = false;
}
@@ -450,7 +451,7 @@ static void mtk_drm_crtc_atomic_flush(struct drm_crtc *crtc,
if (mtk_crtc->event)
mtk_crtc->pending_needs_vblank = true;
- for (i = 0; i < OVL_LAYER_NR; i++) {
+ for (i = 0; i < mtk_crtc->layer_nr; i++) {
struct drm_plane *plane = &mtk_crtc->planes[i];
struct mtk_plane_state *plane_state;
@@ -516,7 +517,7 @@ err_cleanup_crtc:
return ret;
}
-void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *ovl)
+void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *comp)
{
struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
struct mtk_drm_private *priv = crtc->dev->dev_private;
@@ -598,7 +599,12 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
mtk_crtc->ddp_comp[i] = comp;
}
- for (zpos = 0; zpos < OVL_LAYER_NR; zpos++) {
+ mtk_crtc->layer_nr = mtk_ddp_comp_layer_nr(mtk_crtc->ddp_comp[0]);
+ mtk_crtc->planes = devm_kzalloc(dev, mtk_crtc->layer_nr *
+ sizeof(struct drm_plane),
+ GFP_KERNEL);
+
+ for (zpos = 0; zpos < mtk_crtc->layer_nr; zpos++) {
type = (zpos == 0) ? DRM_PLANE_TYPE_PRIMARY :
(zpos == 1) ? DRM_PLANE_TYPE_CURSOR :
DRM_PLANE_TYPE_OVERLAY;
@@ -609,7 +615,8 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
}
ret = mtk_drm_crtc_init(drm_dev, mtk_crtc, &mtk_crtc->planes[0],
- &mtk_crtc->planes[1], pipe);
+ mtk_crtc->layer_nr > 1 ? &mtk_crtc->planes[1] :
+ NULL, pipe);
if (ret < 0)
goto unprepare;
drm_mode_crtc_set_gamma_size(&mtk_crtc->base, MTK_LUT_SIZE);
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h
index 9d9410c67ae9..091adb2087eb 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h
@@ -18,13 +18,12 @@
#include "mtk_drm_ddp_comp.h"
#include "mtk_drm_plane.h"
-#define OVL_LAYER_NR 4
#define MTK_LUT_SIZE 512
#define MTK_MAX_BPC 10
#define MTK_MIN_BPC 3
void mtk_drm_crtc_commit(struct drm_crtc *crtc);
-void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *ovl);
+void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *comp);
int mtk_drm_crtc_create(struct drm_device *drm_dev,
const enum mtk_ddp_comp_id *path,
unsigned int path_len);
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c
index 87e4191c250e..546b3e3b300b 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c
@@ -106,6 +106,8 @@
#define OVL1_MOUT_EN_COLOR1 0x1
#define GAMMA_MOUT_EN_RDMA1 0x1
#define RDMA0_SOUT_DPI0 0x2
+#define RDMA0_SOUT_DPI1 0x3
+#define RDMA0_SOUT_DSI1 0x1
#define RDMA0_SOUT_DSI2 0x4
#define RDMA0_SOUT_DSI3 0x5
#define RDMA1_SOUT_DPI0 0x2
@@ -122,6 +124,8 @@
#define DPI0_SEL_IN_RDMA2 0x3
#define DPI1_SEL_IN_RDMA1 (0x1 << 8)
#define DPI1_SEL_IN_RDMA2 (0x3 << 8)
+#define DSI0_SEL_IN_RDMA1 0x1
+#define DSI0_SEL_IN_RDMA2 0x4
#define DSI1_SEL_IN_RDMA1 0x1
#define DSI1_SEL_IN_RDMA2 0x4
#define DSI2_SEL_IN_RDMA1 (0x1 << 16)
@@ -224,6 +228,12 @@ static unsigned int mtk_ddp_mout_en(enum mtk_ddp_comp_id cur,
} else if (cur == DDP_COMPONENT_RDMA0 && next == DDP_COMPONENT_DPI0) {
*addr = DISP_REG_CONFIG_DISP_RDMA0_SOUT_EN;
value = RDMA0_SOUT_DPI0;
+ } else if (cur == DDP_COMPONENT_RDMA0 && next == DDP_COMPONENT_DPI1) {
+ *addr = DISP_REG_CONFIG_DISP_RDMA0_SOUT_EN;
+ value = RDMA0_SOUT_DPI1;
+ } else if (cur == DDP_COMPONENT_RDMA0 && next == DDP_COMPONENT_DSI1) {
+ *addr = DISP_REG_CONFIG_DISP_RDMA0_SOUT_EN;
+ value = RDMA0_SOUT_DSI1;
} else if (cur == DDP_COMPONENT_RDMA0 && next == DDP_COMPONENT_DSI2) {
*addr = DISP_REG_CONFIG_DISP_RDMA0_SOUT_EN;
value = RDMA0_SOUT_DSI2;
@@ -282,6 +292,9 @@ static unsigned int mtk_ddp_sel_in(enum mtk_ddp_comp_id cur,
} else if (cur == DDP_COMPONENT_RDMA1 && next == DDP_COMPONENT_DPI1) {
*addr = DISP_REG_CONFIG_DPI_SEL_IN;
value = DPI1_SEL_IN_RDMA1;
+ } else if (cur == DDP_COMPONENT_RDMA1 && next == DDP_COMPONENT_DSI0) {
+ *addr = DISP_REG_CONFIG_DSIE_SEL_IN;
+ value = DSI0_SEL_IN_RDMA1;
} else if (cur == DDP_COMPONENT_RDMA1 && next == DDP_COMPONENT_DSI1) {
*addr = DISP_REG_CONFIG_DSIO_SEL_IN;
value = DSI1_SEL_IN_RDMA1;
@@ -297,8 +310,11 @@ static unsigned int mtk_ddp_sel_in(enum mtk_ddp_comp_id cur,
} else if (cur == DDP_COMPONENT_RDMA2 && next == DDP_COMPONENT_DPI1) {
*addr = DISP_REG_CONFIG_DPI_SEL_IN;
value = DPI1_SEL_IN_RDMA2;
- } else if (cur == DDP_COMPONENT_RDMA2 && next == DDP_COMPONENT_DSI1) {
+ } else if (cur == DDP_COMPONENT_RDMA2 && next == DDP_COMPONENT_DSI0) {
*addr = DISP_REG_CONFIG_DSIE_SEL_IN;
+ value = DSI0_SEL_IN_RDMA2;
+ } else if (cur == DDP_COMPONENT_RDMA2 && next == DDP_COMPONENT_DSI1) {
+ *addr = DISP_REG_CONFIG_DSIO_SEL_IN;
value = DSI1_SEL_IN_RDMA2;
} else if (cur == DDP_COMPONENT_RDMA2 && next == DDP_COMPONENT_DSI2) {
*addr = DISP_REG_CONFIG_DSIE_SEL_IN;
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
index 7413ffeb3c9d..8399229e6ad2 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
@@ -78,6 +78,7 @@ struct mtk_ddp_comp_funcs {
void (*stop)(struct mtk_ddp_comp *comp);
void (*enable_vblank)(struct mtk_ddp_comp *comp, struct drm_crtc *crtc);
void (*disable_vblank)(struct mtk_ddp_comp *comp);
+ unsigned int (*layer_nr)(struct mtk_ddp_comp *comp);
void (*layer_on)(struct mtk_ddp_comp *comp, unsigned int idx);
void (*layer_off)(struct mtk_ddp_comp *comp, unsigned int idx);
void (*layer_config)(struct mtk_ddp_comp *comp, unsigned int idx,
@@ -128,6 +129,14 @@ static inline void mtk_ddp_comp_disable_vblank(struct mtk_ddp_comp *comp)
comp->funcs->disable_vblank(comp);
}
+static inline unsigned int mtk_ddp_comp_layer_nr(struct mtk_ddp_comp *comp)
+{
+ if (comp->funcs && comp->funcs->layer_nr)
+ return comp->funcs->layer_nr(comp);
+
+ return 0;
+}
+
static inline void mtk_ddp_comp_layer_on(struct mtk_ddp_comp *comp,
unsigned int idx)
{
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index 39721119713b..47ec604289b7 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -381,7 +381,7 @@ static int mtk_drm_bind(struct device *dev)
err_deinit:
mtk_drm_kms_deinit(drm);
err_free:
- drm_dev_unref(drm);
+ drm_dev_put(drm);
return ret;
}
@@ -390,7 +390,7 @@ static void mtk_drm_unbind(struct device *dev)
struct mtk_drm_private *private = dev_get_drvdata(dev);
drm_dev_unregister(private->drm);
- drm_dev_unref(private->drm);
+ drm_dev_put(private->drm);
private->drm = NULL;
}
@@ -564,7 +564,7 @@ static int mtk_drm_remove(struct platform_device *pdev)
drm_dev_unregister(drm);
mtk_drm_kms_deinit(drm);
- drm_dev_unref(drm);
+ drm_dev_put(drm);
component_master_del(&pdev->dev, &mtk_drm_ops);
pm_runtime_disable(&pdev->dev);
@@ -580,29 +580,24 @@ static int mtk_drm_sys_suspend(struct device *dev)
{
struct mtk_drm_private *private = dev_get_drvdata(dev);
struct drm_device *drm = private->drm;
+ int ret;
- drm_kms_helper_poll_disable(drm);
-
- private->suspend_state = drm_atomic_helper_suspend(drm);
- if (IS_ERR(private->suspend_state)) {
- drm_kms_helper_poll_enable(drm);
- return PTR_ERR(private->suspend_state);
- }
-
+ ret = drm_mode_config_helper_suspend(drm);
DRM_DEBUG_DRIVER("mtk_drm_sys_suspend\n");
- return 0;
+
+ return ret;
}
static int mtk_drm_sys_resume(struct device *dev)
{
struct mtk_drm_private *private = dev_get_drvdata(dev);
struct drm_device *drm = private->drm;
+ int ret;
- drm_atomic_helper_resume(drm, private->suspend_state);
- drm_kms_helper_poll_enable(drm);
-
+ ret = drm_mode_config_helper_resume(drm);
DRM_DEBUG_DRIVER("mtk_drm_sys_resume\n");
- return 0;
+
+ return ret;
}
#endif
diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c
index 90837f7c7d0f..f4c7516eb989 100644
--- a/drivers/hwmon/adt7475.c
+++ b/drivers/hwmon/adt7475.c
@@ -302,14 +302,18 @@ static inline u16 volt2reg(int channel, long volt, u8 bypass_attn)
return clamp_val(reg, 0, 1023) & (0xff << 2);
}
-static u16 adt7475_read_word(struct i2c_client *client, int reg)
+static int adt7475_read_word(struct i2c_client *client, int reg)
{
- u16 val;
+ int val1, val2;
- val = i2c_smbus_read_byte_data(client, reg);
- val |= (i2c_smbus_read_byte_data(client, reg + 1) << 8);
+ val1 = i2c_smbus_read_byte_data(client, reg);
+ if (val1 < 0)
+ return val1;
+ val2 = i2c_smbus_read_byte_data(client, reg + 1);
+ if (val2 < 0)
+ return val2;
- return val;
+ return val1 | (val2 << 8);
}
static void adt7475_write_word(struct i2c_client *client, int reg, u16 val)
@@ -962,13 +966,14 @@ static ssize_t show_pwmfreq(struct device *dev, struct device_attribute *attr,
{
struct adt7475_data *data = adt7475_update_device(dev);
struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
- int i = clamp_val(data->range[sattr->index] & 0xf, 0,
- ARRAY_SIZE(pwmfreq_table) - 1);
+ int idx;
if (IS_ERR(data))
return PTR_ERR(data);
+ idx = clamp_val(data->range[sattr->index] & 0xf, 0,
+ ARRAY_SIZE(pwmfreq_table) - 1);
- return sprintf(buf, "%d\n", pwmfreq_table[i]);
+ return sprintf(buf, "%d\n", pwmfreq_table[idx]);
}
static ssize_t set_pwmfreq(struct device *dev, struct device_attribute *attr,
@@ -1004,6 +1009,10 @@ static ssize_t pwm_use_point2_pwm_at_crit_show(struct device *dev,
char *buf)
{
struct adt7475_data *data = adt7475_update_device(dev);
+
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
return sprintf(buf, "%d\n", !!(data->config4 & CONFIG4_MAXDUTY));
}
diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c
index e9e6aeabbf84..71d3445ba869 100644
--- a/drivers/hwmon/ina2xx.c
+++ b/drivers/hwmon/ina2xx.c
@@ -17,7 +17,7 @@
* Bi-directional Current/Power Monitor with I2C Interface
* Datasheet: http://www.ti.com/product/ina230
*
- * Copyright (C) 2012 Lothar Felten <l-felten@ti.com>
+ * Copyright (C) 2012 Lothar Felten <lothar.felten@gmail.com>
* Thanks to Jan Volkering
*
* This program is free software; you can redistribute it and/or modify
@@ -329,6 +329,15 @@ static int ina2xx_set_shunt(struct ina2xx_data *data, long val)
return 0;
}
+static ssize_t ina2xx_show_shunt(struct device *dev,
+ struct device_attribute *da,
+ char *buf)
+{
+ struct ina2xx_data *data = dev_get_drvdata(dev);
+
+ return snprintf(buf, PAGE_SIZE, "%li\n", data->rshunt);
+}
+
static ssize_t ina2xx_store_shunt(struct device *dev,
struct device_attribute *da,
const char *buf, size_t count)
@@ -403,7 +412,7 @@ static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, ina2xx_show_value, NULL,
/* shunt resistance */
static SENSOR_DEVICE_ATTR(shunt_resistor, S_IRUGO | S_IWUSR,
- ina2xx_show_value, ina2xx_store_shunt,
+ ina2xx_show_shunt, ina2xx_store_shunt,
INA2XX_CALIBRATION);
/* update interval (ina226 only) */
diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index c6bd61e4695a..944f5b63aecd 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -63,6 +63,7 @@
#include <linux/bitops.h>
#include <linux/dmi.h>
#include <linux/io.h>
+#include <linux/nospec.h>
#include "lm75.h"
#define USE_ALTERNATE
@@ -2689,6 +2690,7 @@ store_pwm_weight_temp_sel(struct device *dev, struct device_attribute *attr,
return err;
if (val > NUM_TEMP)
return -EINVAL;
+ val = array_index_nospec(val, NUM_TEMP + 1);
if (val && (!(data->have_temp & BIT(val - 1)) ||
!data->temp_src[val - 1]))
return -EINVAL;
diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c
index 6ec65adaba49..c33dcfb87993 100644
--- a/drivers/i2c/algos/i2c-algo-bit.c
+++ b/drivers/i2c/algos/i2c-algo-bit.c
@@ -110,8 +110,8 @@ static int sclhi(struct i2c_algo_bit_data *adap)
}
#ifdef DEBUG
if (jiffies != start && i2c_debug >= 3)
- pr_debug("i2c-algo-bit: needed %ld jiffies for SCL to go "
- "high\n", jiffies - start);
+ pr_debug("i2c-algo-bit: needed %ld jiffies for SCL to go high\n",
+ jiffies - start);
#endif
done:
@@ -171,8 +171,9 @@ static int i2c_outb(struct i2c_adapter *i2c_adap, unsigned char c)
setsda(adap, sb);
udelay((adap->udelay + 1) / 2);
if (sclhi(adap) < 0) { /* timed out */
- bit_dbg(1, &i2c_adap->dev, "i2c_outb: 0x%02x, "
- "timeout at bit #%d\n", (int)c, i);
+ bit_dbg(1, &i2c_adap->dev,
+ "i2c_outb: 0x%02x, timeout at bit #%d\n",
+ (int)c, i);
return -ETIMEDOUT;
}
/* FIXME do arbitration here:
@@ -185,8 +186,8 @@ static int i2c_outb(struct i2c_adapter *i2c_adap, unsigned char c)
}
sdahi(adap);
if (sclhi(adap) < 0) { /* timeout */
- bit_dbg(1, &i2c_adap->dev, "i2c_outb: 0x%02x, "
- "timeout at ack\n", (int)c);
+ bit_dbg(1, &i2c_adap->dev,
+ "i2c_outb: 0x%02x, timeout at ack\n", (int)c);
return -ETIMEDOUT;
}
@@ -215,8 +216,9 @@ static int i2c_inb(struct i2c_adapter *i2c_adap)
sdahi(adap);
for (i = 0; i < 8; i++) {
if (sclhi(adap) < 0) { /* timeout */
- bit_dbg(1, &i2c_adap->dev, "i2c_inb: timeout at bit "
- "#%d\n", 7 - i);
+ bit_dbg(1, &i2c_adap->dev,
+ "i2c_inb: timeout at bit #%d\n",
+ 7 - i);
return -ETIMEDOUT;
}
indata *= 2;
@@ -265,8 +267,9 @@ static int test_bus(struct i2c_adapter *i2c_adap)
goto bailout;
}
if (!scl) {
- printk(KERN_WARNING "%s: SCL unexpected low "
- "while pulling SDA low!\n", name);
+ printk(KERN_WARNING
+ "%s: SCL unexpected low while pulling SDA low!\n",
+ name);
goto bailout;
}
@@ -278,8 +281,9 @@ static int test_bus(struct i2c_adapter *i2c_adap)
goto bailout;
}
if (!scl) {
- printk(KERN_WARNING "%s: SCL unexpected low "
- "while pulling SDA high!\n", name);
+ printk(KERN_WARNING
+ "%s: SCL unexpected low while pulling SDA high!\n",
+ name);
goto bailout;
}
@@ -291,8 +295,9 @@ static int test_bus(struct i2c_adapter *i2c_adap)
goto bailout;
}
if (!sda) {
- printk(KERN_WARNING "%s: SDA unexpected low "
- "while pulling SCL low!\n", name);
+ printk(KERN_WARNING
+ "%s: SDA unexpected low while pulling SCL low!\n",
+ name);
goto bailout;
}
@@ -304,8 +309,9 @@ static int test_bus(struct i2c_adapter *i2c_adap)
goto bailout;
}
if (!sda) {
- printk(KERN_WARNING "%s: SDA unexpected low "
- "while pulling SCL high!\n", name);
+ printk(KERN_WARNING
+ "%s: SDA unexpected low while pulling SCL high!\n",
+ name);
goto bailout;
}
@@ -352,8 +358,8 @@ static int try_address(struct i2c_adapter *i2c_adap,
i2c_start(adap);
}
if (i && ret)
- bit_dbg(1, &i2c_adap->dev, "Used %d tries to %s client at "
- "0x%02x: %s\n", i + 1,
+ bit_dbg(1, &i2c_adap->dev,
+ "Used %d tries to %s client at 0x%02x: %s\n", i + 1,
addr & 1 ? "read from" : "write to", addr >> 1,
ret == 1 ? "success" : "failed, timeout?");
return ret;
@@ -442,8 +448,9 @@ static int readbytes(struct i2c_adapter *i2c_adap, struct i2c_msg *msg)
if (inval <= 0 || inval > I2C_SMBUS_BLOCK_MAX) {
if (!(flags & I2C_M_NO_RD_ACK))
acknak(i2c_adap, 0);
- dev_err(&i2c_adap->dev, "readbytes: invalid "
- "block length (%d)\n", inval);
+ dev_err(&i2c_adap->dev,
+ "readbytes: invalid block length (%d)\n",
+ inval);
return -EPROTO;
}
/* The original count value accounts for the extra
@@ -506,8 +513,8 @@ static int bit_doAddress(struct i2c_adapter *i2c_adap, struct i2c_msg *msg)
return -ENXIO;
}
if (flags & I2C_M_RD) {
- bit_dbg(3, &i2c_adap->dev, "emitting repeated "
- "start condition\n");
+ bit_dbg(3, &i2c_adap->dev,
+ "emitting repeated start condition\n");
i2c_repstart(adap);
/* okay, now switch into reading mode */
addr |= 0x01;
@@ -564,8 +571,8 @@ static int bit_xfer(struct i2c_adapter *i2c_adap,
}
ret = bit_doAddress(i2c_adap, pmsg);
if ((ret != 0) && !nak_ok) {
- bit_dbg(1, &i2c_adap->dev, "NAK from "
- "device addr 0x%02x msg #%d\n",
+ bit_dbg(1, &i2c_adap->dev,
+ "NAK from device addr 0x%02x msg #%d\n",
msgs[i].addr, i);
goto bailout;
}
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index e18442b9973a..94d94b4a9a0d 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -708,7 +708,6 @@ int i2c_dw_probe(struct dw_i2c_dev *dev)
i2c_set_adapdata(adap, dev);
if (dev->pm_disabled) {
- dev_pm_syscore_device(dev->dev, true);
irq_flags = IRQF_NO_SUSPEND;
} else {
irq_flags = IRQF_SHARED | IRQF_COND_SUSPEND;
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 1a8d2da5b000..b5750fd85125 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -434,6 +434,9 @@ static int dw_i2c_plat_suspend(struct device *dev)
{
struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
+ if (i_dev->pm_disabled)
+ return 0;
+
i_dev->disable(i_dev);
i2c_dw_prepare_clk(i_dev, false);
@@ -444,7 +447,9 @@ static int dw_i2c_plat_resume(struct device *dev)
{
struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
- i2c_dw_prepare_clk(i_dev, true);
+ if (!i_dev->pm_disabled)
+ i2c_dw_prepare_clk(i_dev, true);
+
i_dev->init(i_dev);
return 0;
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 941c223f6491..04b60a349d7e 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -1415,6 +1415,13 @@ static void i801_add_tco(struct i801_priv *priv)
}
#ifdef CONFIG_ACPI
+static bool i801_acpi_is_smbus_ioport(const struct i801_priv *priv,
+ acpi_physical_address address)
+{
+ return address >= priv->smba &&
+ address <= pci_resource_end(priv->pci_dev, SMBBAR);
+}
+
static acpi_status
i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits,
u64 *value, void *handler_context, void *region_context)
@@ -1430,7 +1437,7 @@ i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits,
*/
mutex_lock(&priv->acpi_lock);
- if (!priv->acpi_reserved) {
+ if (!priv->acpi_reserved && i801_acpi_is_smbus_ioport(priv, address)) {
priv->acpi_reserved = true;
dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n");
diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index 439e8778f849..818cab14e87c 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c
@@ -507,8 +507,6 @@ static void sh_mobile_i2c_dma_callback(void *data)
pd->pos = pd->msg->len;
pd->stop_after_dma = true;
- i2c_release_dma_safe_msg_buf(pd->msg, pd->dma_buf);
-
iic_set_clr(pd, ICIC, 0, ICIC_TDMAE | ICIC_RDMAE);
}
@@ -602,8 +600,8 @@ static void sh_mobile_i2c_xfer_dma(struct sh_mobile_i2c_data *pd)
dma_async_issue_pending(chan);
}
-static int start_ch(struct sh_mobile_i2c_data *pd, struct i2c_msg *usr_msg,
- bool do_init)
+static void start_ch(struct sh_mobile_i2c_data *pd, struct i2c_msg *usr_msg,
+ bool do_init)
{
if (do_init) {
/* Initialize channel registers */
@@ -627,7 +625,6 @@ static int start_ch(struct sh_mobile_i2c_data *pd, struct i2c_msg *usr_msg,
/* Enable all interrupts to begin with */
iic_wr(pd, ICIC, ICIC_DTEE | ICIC_WAITE | ICIC_ALE | ICIC_TACKE);
- return 0;
}
static int poll_dte(struct sh_mobile_i2c_data *pd)
@@ -698,9 +695,7 @@ static int sh_mobile_i2c_xfer(struct i2c_adapter *adapter,
pd->send_stop = i == num - 1 || msg->flags & I2C_M_STOP;
pd->stop_after_dma = false;
- err = start_ch(pd, msg, do_start);
- if (err)
- break;
+ start_ch(pd, msg, do_start);
if (do_start)
i2c_op(pd, OP_START, 0);
@@ -709,6 +704,10 @@ static int sh_mobile_i2c_xfer(struct i2c_adapter *adapter,
timeout = wait_event_timeout(pd->wait,
pd->sr & (ICSR_TACK | SW_DONE),
adapter->timeout);
+
+ /* 'stop_after_dma' tells if DMA transfer was complete */
+ i2c_put_dma_safe_msg_buf(pd->dma_buf, pd->msg, pd->stop_after_dma);
+
if (!timeout) {
dev_err(pd->dev, "Transfer request timed out\n");
if (pd->dma_direction != DMA_NONE)
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index f15737763608..9ee9a15e7134 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -2293,21 +2293,22 @@ u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold)
EXPORT_SYMBOL_GPL(i2c_get_dma_safe_msg_buf);
/**
- * i2c_release_dma_safe_msg_buf - release DMA safe buffer and sync with i2c_msg
- * @msg: the message to be synced with
+ * i2c_put_dma_safe_msg_buf - release DMA safe buffer and sync with i2c_msg
* @buf: the buffer obtained from i2c_get_dma_safe_msg_buf(). May be NULL.
+ * @msg: the message which the buffer corresponds to
+ * @xferred: bool saying if the message was transferred
*/
-void i2c_release_dma_safe_msg_buf(struct i2c_msg *msg, u8 *buf)
+void i2c_put_dma_safe_msg_buf(u8 *buf, struct i2c_msg *msg, bool xferred)
{
if (!buf || buf == msg->buf)
return;
- if (msg->flags & I2C_M_RD)
+ if (xferred && msg->flags & I2C_M_RD)
memcpy(msg->buf, buf, msg->len);
kfree(buf);
}
-EXPORT_SYMBOL_GPL(i2c_release_dma_safe_msg_buf);
+EXPORT_SYMBOL_GPL(i2c_put_dma_safe_msg_buf);
MODULE_AUTHOR("Simon G. Vogl <simon@tk.uni-linz.ac.at>");
MODULE_DESCRIPTION("I2C-Bus main module");
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index abb6660c099c..0a3ec7c726ec 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -26,6 +26,7 @@ config INFINIBAND_USER_MAD
config INFINIBAND_USER_ACCESS
tristate "InfiniBand userspace access (verbs and CM)"
select ANON_INODES
+ depends on MMU
---help---
Userspace InfiniBand access support. This enables the
kernel side of userspace verbs and the userspace
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 46b855a42884..0dce94e3c495 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -45,6 +45,7 @@
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <rdma/ib_addr.h>
+#include <rdma/ib_sa.h>
#include <rdma/ib.h>
#include <rdma/rdma_netlink.h>
#include <net/netlink.h>
@@ -61,6 +62,7 @@ struct addr_req {
struct rdma_dev_addr *addr, void *context);
unsigned long timeout;
struct delayed_work work;
+ bool resolve_by_gid_attr; /* Consider gid attr in resolve phase */
int status;
u32 seq;
};
@@ -219,60 +221,75 @@ int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr)
}
EXPORT_SYMBOL(rdma_addr_size_kss);
-void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
- const struct net_device *dev,
- const unsigned char *dst_dev_addr)
+/**
+ * rdma_copy_src_l2_addr - Copy netdevice source addresses
+ * @dev_addr: Destination address pointer where to copy the addresses
+ * @dev: Netdevice whose source addresses to copy
+ *
+ * rdma_copy_src_l2_addr() copies source addresses from the specified netdevice.
+ * This includes unicast address, broadcast address, device type and
+ * interface index.
+ */
+void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
+ const struct net_device *dev)
{
dev_addr->dev_type = dev->type;
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
- if (dst_dev_addr)
- memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
dev_addr->bound_dev_if = dev->ifindex;
}
-EXPORT_SYMBOL(rdma_copy_addr);
+EXPORT_SYMBOL(rdma_copy_src_l2_addr);
-int rdma_translate_ip(const struct sockaddr *addr,
- struct rdma_dev_addr *dev_addr)
+static struct net_device *
+rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in)
{
- struct net_device *dev;
+ struct net_device *dev = NULL;
+ int ret = -EADDRNOTAVAIL;
- if (dev_addr->bound_dev_if) {
- dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
- if (!dev)
- return -ENODEV;
- rdma_copy_addr(dev_addr, dev, NULL);
- dev_put(dev);
- return 0;
- }
-
- switch (addr->sa_family) {
+ switch (src_in->sa_family) {
case AF_INET:
- dev = ip_dev_find(dev_addr->net,
- ((const struct sockaddr_in *)addr)->sin_addr.s_addr);
-
- if (!dev)
- return -EADDRNOTAVAIL;
-
- rdma_copy_addr(dev_addr, dev, NULL);
- dev_put(dev);
+ dev = __ip_dev_find(net,
+ ((const struct sockaddr_in *)src_in)->sin_addr.s_addr,
+ false);
+ if (dev)
+ ret = 0;
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- rcu_read_lock();
- for_each_netdev_rcu(dev_addr->net, dev) {
- if (ipv6_chk_addr(dev_addr->net,
- &((const struct sockaddr_in6 *)addr)->sin6_addr,
+ for_each_netdev_rcu(net, dev) {
+ if (ipv6_chk_addr(net,
+ &((const struct sockaddr_in6 *)src_in)->sin6_addr,
dev, 1)) {
- rdma_copy_addr(dev_addr, dev, NULL);
+ ret = 0;
break;
}
}
- rcu_read_unlock();
break;
#endif
}
- return 0;
+ return ret ? ERR_PTR(ret) : dev;
+}
+
+int rdma_translate_ip(const struct sockaddr *addr,
+ struct rdma_dev_addr *dev_addr)
+{
+ struct net_device *dev;
+
+ if (dev_addr->bound_dev_if) {
+ dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+ rdma_copy_src_l2_addr(dev_addr, dev);
+ dev_put(dev);
+ return 0;
+ }
+
+ rcu_read_lock();
+ dev = rdma_find_ndev_for_src_ip_rcu(dev_addr->net, addr);
+ if (!IS_ERR(dev))
+ rdma_copy_src_l2_addr(dev_addr, dev);
+ rcu_read_unlock();
+ return PTR_ERR_OR_ZERO(dev);
}
EXPORT_SYMBOL(rdma_translate_ip);
@@ -295,15 +312,12 @@ static void queue_req(struct addr_req *req)
spin_unlock_bh(&lock);
}
-static int ib_nl_fetch_ha(const struct dst_entry *dst,
- struct rdma_dev_addr *dev_addr,
+static int ib_nl_fetch_ha(struct rdma_dev_addr *dev_addr,
const void *daddr, u32 seq, u16 family)
{
- if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
+ if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
return -EADDRNOTAVAIL;
- /* We fill in what we can, the response will fill the rest */
- rdma_copy_addr(dev_addr, dst->dev, NULL);
return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
}
@@ -322,7 +336,7 @@ static int dst_fetch_ha(const struct dst_entry *dst,
neigh_event_send(n, NULL);
ret = -ENODATA;
} else {
- rdma_copy_addr(dev_addr, dst->dev, n->ha);
+ memcpy(dev_addr->dst_dev_addr, n->ha, MAX_ADDR_LEN);
}
neigh_release(n);
@@ -356,18 +370,22 @@ static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
(const void *)&dst_in6->sin6_addr;
sa_family_t family = dst_in->sa_family;
- /* Gateway + ARPHRD_INFINIBAND -> IB router */
- if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND)
- return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family);
+ /* If we have a gateway in IB mode then it must be an IB network */
+ if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB)
+ return ib_nl_fetch_ha(dev_addr, daddr, seq, family);
else
return dst_fetch_ha(dst, dev_addr, daddr);
}
-static int addr4_resolve(struct sockaddr_in *src_in,
- const struct sockaddr_in *dst_in,
+static int addr4_resolve(struct sockaddr *src_sock,
+ const struct sockaddr *dst_sock,
struct rdma_dev_addr *addr,
struct rtable **prt)
{
+ struct sockaddr_in *src_in = (struct sockaddr_in *)src_sock;
+ const struct sockaddr_in *dst_in =
+ (const struct sockaddr_in *)dst_sock;
+
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
struct rtable *rt;
@@ -383,16 +401,8 @@ static int addr4_resolve(struct sockaddr_in *src_in,
if (ret)
return ret;
- src_in->sin_family = AF_INET;
src_in->sin_addr.s_addr = fl4.saddr;
- /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
- * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
- * type accordingly.
- */
- if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
- addr->network = RDMA_NETWORK_IPV4;
-
addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
*prt = rt;
@@ -400,14 +410,16 @@ static int addr4_resolve(struct sockaddr_in *src_in,
}
#if IS_ENABLED(CONFIG_IPV6)
-static int addr6_resolve(struct sockaddr_in6 *src_in,
- const struct sockaddr_in6 *dst_in,
+static int addr6_resolve(struct sockaddr *src_sock,
+ const struct sockaddr *dst_sock,
struct rdma_dev_addr *addr,
struct dst_entry **pdst)
{
+ struct sockaddr_in6 *src_in = (struct sockaddr_in6 *)src_sock;
+ const struct sockaddr_in6 *dst_in =
+ (const struct sockaddr_in6 *)dst_sock;
struct flowi6 fl6;
struct dst_entry *dst;
- struct rt6_info *rt;
int ret;
memset(&fl6, 0, sizeof fl6);
@@ -419,19 +431,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
if (ret < 0)
return ret;
- rt = (struct rt6_info *)dst;
- if (ipv6_addr_any(&src_in->sin6_addr)) {
- src_in->sin6_family = AF_INET6;
+ if (ipv6_addr_any(&src_in->sin6_addr))
src_in->sin6_addr = fl6.saddr;
- }
-
- /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
- * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
- * type accordingly.
- */
- if (rt->rt6i_flags & RTF_GATEWAY &&
- ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
- addr->network = RDMA_NETWORK_IPV6;
addr->hoplimit = ip6_dst_hoplimit(dst);
@@ -439,8 +440,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
return 0;
}
#else
-static int addr6_resolve(struct sockaddr_in6 *src_in,
- const struct sockaddr_in6 *dst_in,
+static int addr6_resolve(struct sockaddr *src_sock,
+ const struct sockaddr *dst_sock,
struct rdma_dev_addr *addr,
struct dst_entry **pdst)
{
@@ -451,36 +452,110 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
static int addr_resolve_neigh(const struct dst_entry *dst,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr,
+ unsigned int ndev_flags,
u32 seq)
{
- if (dst->dev->flags & IFF_LOOPBACK) {
- int ret;
+ int ret = 0;
- ret = rdma_translate_ip(dst_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, addr->src_dev_addr,
- MAX_ADDR_LEN);
+ if (ndev_flags & IFF_LOOPBACK) {
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+ } else {
+ if (!(ndev_flags & IFF_NOARP)) {
+ /* If the device doesn't do ARP internally */
+ ret = fetch_ha(dst, addr, dst_in, seq);
+ }
+ }
+ return ret;
+}
- return ret;
+static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
+ const struct sockaddr *dst_in,
+ const struct dst_entry *dst,
+ const struct net_device *ndev)
+{
+ int ret = 0;
+
+ if (dst->dev->flags & IFF_LOOPBACK)
+ ret = rdma_translate_ip(dst_in, dev_addr);
+ else
+ rdma_copy_src_l2_addr(dev_addr, dst->dev);
+
+ /*
+ * If there's a gateway and type of device not ARPHRD_INFINIBAND,
+ * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the
+ * network type accordingly.
+ */
+ if (has_gateway(dst, dst_in->sa_family) &&
+ ndev->type != ARPHRD_INFINIBAND)
+ dev_addr->network = dst_in->sa_family == AF_INET ?
+ RDMA_NETWORK_IPV4 :
+ RDMA_NETWORK_IPV6;
+ else
+ dev_addr->network = RDMA_NETWORK_IB;
+
+ return ret;
+}
+
+static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr,
+ unsigned int *ndev_flags,
+ const struct sockaddr *dst_in,
+ const struct dst_entry *dst)
+{
+ struct net_device *ndev = READ_ONCE(dst->dev);
+
+ *ndev_flags = ndev->flags;
+ /* A physical device must be the RDMA device to use */
+ if (ndev->flags & IFF_LOOPBACK) {
+ /*
+ * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or
+ * loopback IP address. So if route is resolved to loopback
+ * interface, translate that to a real ndev based on non
+ * loopback IP address.
+ */
+ ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in);
+ if (IS_ERR(ndev))
+ return -ENODEV;
}
- /* If the device doesn't do ARP internally */
- if (!(dst->dev->flags & IFF_NOARP))
- return fetch_ha(dst, addr, dst_in, seq);
+ return copy_src_l2_addr(dev_addr, dst_in, dst, ndev);
+}
+
+static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr)
+{
+ struct net_device *ndev;
- rdma_copy_addr(addr, dst->dev, NULL);
+ ndev = rdma_read_gid_attr_ndev_rcu(addr->sgid_attr);
+ if (IS_ERR(ndev))
+ return PTR_ERR(ndev);
+ /*
+ * Since we are holding the rcu, reading net and ifindex
+ * are safe without any additional reference; because
+ * change_net_namespace() in net/core/dev.c does rcu sync
+ * after it changes the state to IFF_DOWN and before
+ * updating netdev fields {net, ifindex}.
+ */
+ addr->net = dev_net(ndev);
+ addr->bound_dev_if = ndev->ifindex;
return 0;
}
+static void rdma_addr_set_net_defaults(struct rdma_dev_addr *addr)
+{
+ addr->net = &init_net;
+ addr->bound_dev_if = 0;
+}
+
static int addr_resolve(struct sockaddr *src_in,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr,
bool resolve_neigh,
+ bool resolve_by_gid_attr,
u32 seq)
{
- struct net_device *ndev;
- struct dst_entry *dst;
+ struct dst_entry *dst = NULL;
+ unsigned int ndev_flags = 0;
+ struct rtable *rt = NULL;
int ret;
if (!addr->net) {
@@ -488,58 +563,55 @@ static int addr_resolve(struct sockaddr *src_in,
return -EINVAL;
}
- if (src_in->sa_family == AF_INET) {
- struct rtable *rt = NULL;
- const struct sockaddr_in *dst_in4 =
- (const struct sockaddr_in *)dst_in;
-
- ret = addr4_resolve((struct sockaddr_in *)src_in,
- dst_in4, addr, &rt);
- if (ret)
- return ret;
-
- if (resolve_neigh)
- ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
-
- if (addr->bound_dev_if) {
- ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
- } else {
- ndev = rt->dst.dev;
- dev_hold(ndev);
+ rcu_read_lock();
+ if (resolve_by_gid_attr) {
+ if (!addr->sgid_attr) {
+ rcu_read_unlock();
+ pr_warn_ratelimited("%s: missing gid_attr\n", __func__);
+ return -EINVAL;
}
-
- ip_rt_put(rt);
- } else {
- const struct sockaddr_in6 *dst_in6 =
- (const struct sockaddr_in6 *)dst_in;
-
- ret = addr6_resolve((struct sockaddr_in6 *)src_in,
- dst_in6, addr,
- &dst);
- if (ret)
+ /*
+ * If the request is for a specific gid attribute of the
+ * rdma_dev_addr, derive net from the netdevice of the
+ * GID attribute.
+ */
+ ret = set_addr_netns_by_gid_rcu(addr);
+ if (ret) {
+ rcu_read_unlock();
return ret;
-
- if (resolve_neigh)
- ret = addr_resolve_neigh(dst, dst_in, addr, seq);
-
- if (addr->bound_dev_if) {
- ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
- } else {
- ndev = dst->dev;
- dev_hold(ndev);
}
-
- dst_release(dst);
}
-
- if (ndev) {
- if (ndev->flags & IFF_LOOPBACK)
- ret = rdma_translate_ip(dst_in, addr);
- else
- addr->bound_dev_if = ndev->ifindex;
- dev_put(ndev);
+ if (src_in->sa_family == AF_INET) {
+ ret = addr4_resolve(src_in, dst_in, addr, &rt);
+ dst = &rt->dst;
+ } else {
+ ret = addr6_resolve(src_in, dst_in, addr, &dst);
}
+ if (ret) {
+ rcu_read_unlock();
+ goto done;
+ }
+ ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst);
+ rcu_read_unlock();
+
+ /*
+ * Resolve neighbor destination address if requested and
+ * only if src addr translation didn't fail.
+ */
+ if (!ret && resolve_neigh)
+ ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq);
+ if (src_in->sa_family == AF_INET)
+ ip_rt_put(rt);
+ else
+ dst_release(dst);
+done:
+ /*
+ * Clear the addr net to go back to its original state, only if it was
+ * derived from GID attribute in this context.
+ */
+ if (resolve_by_gid_attr)
+ rdma_addr_set_net_defaults(addr);
return ret;
}
@@ -554,7 +626,8 @@ static void process_one_req(struct work_struct *_work)
src_in = (struct sockaddr *)&req->src_addr;
dst_in = (struct sockaddr *)&req->dst_addr;
req->status = addr_resolve(src_in, dst_in, req->addr,
- true, req->seq);
+ true, req->resolve_by_gid_attr,
+ req->seq);
if (req->status && time_after_eq(jiffies, req->timeout)) {
req->status = -ETIMEDOUT;
} else if (req->status == -ENODATA) {
@@ -586,10 +659,10 @@ static void process_one_req(struct work_struct *_work)
}
int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
- struct rdma_dev_addr *addr, int timeout_ms,
+ struct rdma_dev_addr *addr, unsigned long timeout_ms,
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context),
- void *context)
+ bool resolve_by_gid_attr, void *context)
{
struct sockaddr *src_in, *dst_in;
struct addr_req *req;
@@ -617,10 +690,12 @@ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
req->addr = addr;
req->callback = callback;
req->context = context;
+ req->resolve_by_gid_attr = resolve_by_gid_attr;
INIT_DELAYED_WORK(&req->work, process_one_req);
req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
- req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
+ req->status = addr_resolve(src_in, dst_in, addr, true,
+ req->resolve_by_gid_attr, req->seq);
switch (req->status) {
case 0:
req->timeout = jiffies;
@@ -641,25 +716,53 @@ err:
}
EXPORT_SYMBOL(rdma_resolve_ip);
-int rdma_resolve_ip_route(struct sockaddr *src_addr,
- const struct sockaddr *dst_addr,
- struct rdma_dev_addr *addr)
+int roce_resolve_route_from_path(struct sa_path_rec *rec,
+ const struct ib_gid_attr *attr)
{
- struct sockaddr_storage ssrc_addr = {};
- struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } sgid, dgid;
+ struct rdma_dev_addr dev_addr = {};
+ int ret;
- if (src_addr) {
- if (src_addr->sa_family != dst_addr->sa_family)
- return -EINVAL;
+ if (rec->roce.route_resolved)
+ return 0;
- memcpy(src_in, src_addr, rdma_addr_size(src_addr));
- } else {
- src_in->sa_family = dst_addr->sa_family;
- }
+ rdma_gid2ip(&sgid._sockaddr, &rec->sgid);
+ rdma_gid2ip(&dgid._sockaddr, &rec->dgid);
+
+ if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family)
+ return -EINVAL;
+
+ if (!attr || !attr->ndev)
+ return -EINVAL;
+
+ dev_addr.net = &init_net;
+ dev_addr.sgid_attr = attr;
+
+ ret = addr_resolve(&sgid._sockaddr, &dgid._sockaddr,
+ &dev_addr, false, true, 0);
+ if (ret)
+ return ret;
+
+ if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
+ dev_addr.network == RDMA_NETWORK_IPV6) &&
+ rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
+ return -EINVAL;
- return addr_resolve(src_in, dst_addr, addr, false, 0);
+ rec->roce.route_resolved = true;
+ return 0;
}
+/**
+ * rdma_addr_cancel - Cancel resolve ip request
+ * @addr: Pointer to address structure given previously
+ * during rdma_resolve_ip().
+ * rdma_addr_cancel() is synchronous function which cancels any pending
+ * request if there is any.
+ */
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
struct addr_req *req, *temp_req;
@@ -687,11 +790,6 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
* guarentees no work is running and none will be started.
*/
cancel_delayed_work_sync(&found->work);
-
- if (found->callback)
- found->callback(-ECANCELED, (struct sockaddr *)&found->src_addr,
- found->addr, found->context);
-
kfree(found);
}
EXPORT_SYMBOL(rdma_addr_cancel);
@@ -710,7 +808,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
const union ib_gid *dgid,
- u8 *dmac, const struct net_device *ndev,
+ u8 *dmac, const struct ib_gid_attr *sgid_attr,
int *hoplimit)
{
struct rdma_dev_addr dev_addr;
@@ -726,12 +824,12 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
rdma_gid2ip(&dgid_addr._sockaddr, dgid);
memset(&dev_addr, 0, sizeof(dev_addr));
- dev_addr.bound_dev_if = ndev->ifindex;
dev_addr.net = &init_net;
+ dev_addr.sgid_attr = sgid_attr;
init_completion(&ctx.comp);
ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr,
- &dev_addr, 1000, resolve_cb, &ctx);
+ &dev_addr, 1000, resolve_cb, true, &ctx);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 0bee1f4b914e..5b2fce4a7091 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -212,9 +212,8 @@ static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
u8 port_num = entry->attr.port_num;
struct ib_gid_table *table = rdma_gid_table(device, port_num);
- pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
- device->name, port_num, entry->attr.index,
- entry->attr.gid.raw);
+ dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__,
+ port_num, entry->attr.index, entry->attr.gid.raw);
if (rdma_cap_roce_gid_table(device, port_num) &&
entry->state != GID_TABLE_ENTRY_INVALID)
@@ -289,9 +288,9 @@ static void store_gid_entry(struct ib_gid_table *table,
{
entry->state = GID_TABLE_ENTRY_VALID;
- pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
- entry->attr.device->name, entry->attr.port_num,
- entry->attr.index, entry->attr.gid.raw);
+ dev_dbg(&entry->attr.device->dev, "%s port=%d index=%d gid %pI6\n",
+ __func__, entry->attr.port_num, entry->attr.index,
+ entry->attr.gid.raw);
lockdep_assert_held(&table->lock);
write_lock_irq(&table->rwlock);
@@ -320,17 +319,16 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
int ret;
if (!attr->ndev) {
- pr_err("%s NULL netdev device=%s port=%d index=%d\n",
- __func__, attr->device->name, attr->port_num,
- attr->index);
+ dev_err(&attr->device->dev, "%s NULL netdev port=%d index=%d\n",
+ __func__, attr->port_num, attr->index);
return -EINVAL;
}
if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
ret = attr->device->add_gid(attr, &entry->context);
if (ret) {
- pr_err("%s GID add failed device=%s port=%d index=%d\n",
- __func__, attr->device->name, attr->port_num,
- attr->index);
+ dev_err(&attr->device->dev,
+ "%s GID add failed port=%d index=%d\n",
+ __func__, attr->port_num, attr->index);
return ret;
}
}
@@ -338,6 +336,38 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
}
/**
+ * del_gid - Delete GID table entry
+ *
+ * @ib_dev: IB device whose GID entry to be deleted
+ * @port: Port number of the IB device
+ * @table: GID table of the IB device for a port
+ * @ix: GID entry index to delete
+ *
+ */
+static void del_gid(struct ib_device *ib_dev, u8 port,
+ struct ib_gid_table *table, int ix)
+{
+ struct ib_gid_table_entry *entry;
+
+ lockdep_assert_held(&table->lock);
+
+ dev_dbg(&ib_dev->dev, "%s port=%d index=%d gid %pI6\n", __func__, port,
+ ix, table->data_vec[ix]->attr.gid.raw);
+
+ write_lock_irq(&table->rwlock);
+ entry = table->data_vec[ix];
+ entry->state = GID_TABLE_ENTRY_PENDING_DEL;
+ /*
+ * For non RoCE protocol, GID entry slot is ready to use.
+ */
+ if (!rdma_protocol_roce(ib_dev, port))
+ table->data_vec[ix] = NULL;
+ write_unlock_irq(&table->rwlock);
+
+ put_gid_entry_locked(entry);
+}
+
+/**
* add_modify_gid - Add or modify GID table entry
*
* @table: GID table in which GID to be added or modified
@@ -358,7 +388,7 @@ static int add_modify_gid(struct ib_gid_table *table,
* this index.
*/
if (is_gid_entry_valid(table->data_vec[attr->index]))
- put_gid_entry(table->data_vec[attr->index]);
+ del_gid(attr->device, attr->port_num, table, attr->index);
/*
* Some HCA's report multiple GID entries with only one valid GID, and
@@ -386,39 +416,6 @@ done:
return ret;
}
-/**
- * del_gid - Delete GID table entry
- *
- * @ib_dev: IB device whose GID entry to be deleted
- * @port: Port number of the IB device
- * @table: GID table of the IB device for a port
- * @ix: GID entry index to delete
- *
- */
-static void del_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix)
-{
- struct ib_gid_table_entry *entry;
-
- lockdep_assert_held(&table->lock);
-
- pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
- ib_dev->name, port, ix,
- table->data_vec[ix]->attr.gid.raw);
-
- write_lock_irq(&table->rwlock);
- entry = table->data_vec[ix];
- entry->state = GID_TABLE_ENTRY_PENDING_DEL;
- /*
- * For non RoCE protocol, GID entry slot is ready to use.
- */
- if (!rdma_protocol_roce(ib_dev, port))
- table->data_vec[ix] = NULL;
- write_unlock_irq(&table->rwlock);
-
- put_gid_entry_locked(entry);
-}
-
/* rwlock should be read locked, or lock should be held */
static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
const struct ib_gid_attr *val, bool default_gid,
@@ -782,9 +779,9 @@ static void release_gid_table(struct ib_device *device, u8 port,
if (is_gid_entry_free(table->data_vec[i]))
continue;
if (kref_read(&table->data_vec[i]->kref) > 1) {
- pr_err("GID entry ref leak for %s (index %d) ref=%d\n",
- device->name, i,
- kref_read(&table->data_vec[i]->kref));
+ dev_err(&device->dev,
+ "GID entry ref leak for index %d ref=%d\n", i,
+ kref_read(&table->data_vec[i]->kref));
leak = true;
}
}
@@ -1252,6 +1249,39 @@ void rdma_hold_gid_attr(const struct ib_gid_attr *attr)
}
EXPORT_SYMBOL(rdma_hold_gid_attr);
+/**
+ * rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice
+ * which must be in UP state.
+ *
+ * @attr:Pointer to the GID attribute
+ *
+ * Returns pointer to netdevice if the netdevice was attached to GID and
+ * netdevice is in UP state. Caller must hold RCU lock as this API
+ * reads the netdev flags which can change while netdevice migrates to
+ * different net namespace. Returns ERR_PTR with error code otherwise.
+ *
+ */
+struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
+{
+ struct ib_gid_table_entry *entry =
+ container_of(attr, struct ib_gid_table_entry, attr);
+ struct ib_device *device = entry->attr.device;
+ struct net_device *ndev = ERR_PTR(-ENODEV);
+ u8 port_num = entry->attr.port_num;
+ struct ib_gid_table *table;
+ unsigned long flags;
+ bool valid;
+
+ table = rdma_gid_table(device, port_num);
+
+ read_lock_irqsave(&table->rwlock, flags);
+ valid = is_gid_entry_valid(table->data_vec[attr->index]);
+ if (valid && attr->ndev && (READ_ONCE(attr->ndev->flags) & IFF_UP))
+ ndev = attr->ndev;
+ read_unlock_irqrestore(&table->rwlock, flags);
+ return ndev;
+}
+
static int config_non_roce_gid_cache(struct ib_device *device,
u8 port, int gid_tbl_len)
{
@@ -1270,8 +1300,9 @@ static int config_non_roce_gid_cache(struct ib_device *device,
continue;
ret = device->query_gid(device, port, i, &gid_attr.gid);
if (ret) {
- pr_warn("query_gid failed (%d) for %s (index %d)\n",
- ret, device->name, i);
+ dev_warn(&device->dev,
+ "query_gid failed (%d) for index %d\n", ret,
+ i);
goto err;
}
gid_attr.index = i;
@@ -1300,8 +1331,7 @@ static void ib_cache_update(struct ib_device *device,
ret = ib_query_port(device, port, tprops);
if (ret) {
- pr_warn("ib_query_port failed (%d) for %s\n",
- ret, device->name);
+ dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret);
goto err;
}
@@ -1323,8 +1353,9 @@ static void ib_cache_update(struct ib_device *device,
for (i = 0; i < pkey_cache->table_len; ++i) {
ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
if (ret) {
- pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
- ret, device->name, i);
+ dev_warn(&device->dev,
+ "ib_query_pkey failed (%d) for index %d\n",
+ ret, i);
goto err;
}
}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 6e39c27dca8e..edb2cb758be7 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3292,8 +3292,11 @@ static int cm_lap_handler(struct cm_work *work)
if (ret)
goto unlock;
- cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av,
- cm_id_priv);
+ ret = cm_init_av_by_path(param->alternate_path, NULL,
+ &cm_id_priv->alt_av, cm_id_priv);
+ if (ret)
+ goto unlock;
+
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid;
ret = atomic_inc_and_test(&cm_id_priv->work_count);
@@ -4367,7 +4370,7 @@ static void cm_add_one(struct ib_device *ib_device)
cm_dev->going_down = 0;
cm_dev->device = device_create(&cm_class, &ib_device->dev,
MKDEV(0, 0), NULL,
- "%s", ib_device->name);
+ "%s", dev_name(&ib_device->dev));
if (IS_ERR(cm_dev->device)) {
kfree(cm_dev);
return;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index f72677291b69..15d5bb7bf6bb 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -639,13 +639,21 @@ static void cma_bind_sgid_attr(struct rdma_id_private *id_priv,
id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr;
}
-static int cma_acquire_dev(struct rdma_id_private *id_priv,
- const struct rdma_id_private *listen_id_priv)
+/**
+ * cma_acquire_dev_by_src_ip - Acquire cma device, port, gid attribute
+ * based on source ip address.
+ * @id_priv: cm_id which should be bound to cma device
+ *
+ * cma_acquire_dev_by_src_ip() binds cm id to cma device, port and GID attribute
+ * based on source IP address. It returns 0 on success or error code otherwise.
+ * It is applicable to active and passive side cm_id.
+ */
+static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
const struct ib_gid_attr *sgid_attr;
- struct cma_device *cma_dev;
union ib_gid gid, iboe_gid, *gidp;
+ struct cma_device *cma_dev;
enum ib_gid_type gid_type;
int ret = -ENODEV;
u8 port;
@@ -654,41 +662,125 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
id_priv->id.ps == RDMA_PS_IPOIB)
return -EINVAL;
- mutex_lock(&lock);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&iboe_gid);
memcpy(&gid, dev_addr->src_dev_addr +
- rdma_addr_gid_offset(dev_addr), sizeof gid);
-
- if (listen_id_priv) {
- cma_dev = listen_id_priv->cma_dev;
- port = listen_id_priv->id.port_num;
- gidp = rdma_protocol_roce(cma_dev->device, port) ?
- &iboe_gid : &gid;
- gid_type = listen_id_priv->gid_type;
- sgid_attr = cma_validate_port(cma_dev->device, port,
- gid_type, gidp, id_priv);
- if (!IS_ERR(sgid_attr)) {
- id_priv->id.port_num = port;
- cma_bind_sgid_attr(id_priv, sgid_attr);
- ret = 0;
- goto out;
+ rdma_addr_gid_offset(dev_addr), sizeof(gid));
+
+ mutex_lock(&lock);
+ list_for_each_entry(cma_dev, &dev_list, list) {
+ for (port = rdma_start_port(cma_dev->device);
+ port <= rdma_end_port(cma_dev->device); port++) {
+ gidp = rdma_protocol_roce(cma_dev->device, port) ?
+ &iboe_gid : &gid;
+ gid_type = cma_dev->default_gid_type[port - 1];
+ sgid_attr = cma_validate_port(cma_dev->device, port,
+ gid_type, gidp, id_priv);
+ if (!IS_ERR(sgid_attr)) {
+ id_priv->id.port_num = port;
+ cma_bind_sgid_attr(id_priv, sgid_attr);
+ cma_attach_to_dev(id_priv, cma_dev);
+ ret = 0;
+ goto out;
+ }
}
}
+out:
+ mutex_unlock(&lock);
+ return ret;
+}
+
+/**
+ * cma_ib_acquire_dev - Acquire cma device, port and SGID attribute
+ * @id_priv: cm id to bind to cma device
+ * @listen_id_priv: listener cm id to match against
+ * @req: Pointer to req structure containaining incoming
+ * request information
+ * cma_ib_acquire_dev() acquires cma device, port and SGID attribute when
+ * rdma device matches for listen_id and incoming request. It also verifies
+ * that a GID table entry is present for the source address.
+ * Returns 0 on success, or returns error code otherwise.
+ */
+static int cma_ib_acquire_dev(struct rdma_id_private *id_priv,
+ const struct rdma_id_private *listen_id_priv,
+ struct cma_req_info *req)
+{
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ const struct ib_gid_attr *sgid_attr;
+ enum ib_gid_type gid_type;
+ union ib_gid gid;
+
+ if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
+ id_priv->id.ps == RDMA_PS_IPOIB)
+ return -EINVAL;
+
+ if (rdma_protocol_roce(req->device, req->port))
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &gid);
+ else
+ memcpy(&gid, dev_addr->src_dev_addr +
+ rdma_addr_gid_offset(dev_addr), sizeof(gid));
+
+ gid_type = listen_id_priv->cma_dev->default_gid_type[req->port - 1];
+ sgid_attr = cma_validate_port(req->device, req->port,
+ gid_type, &gid, id_priv);
+ if (IS_ERR(sgid_attr))
+ return PTR_ERR(sgid_attr);
+
+ id_priv->id.port_num = req->port;
+ cma_bind_sgid_attr(id_priv, sgid_attr);
+ /* Need to acquire lock to protect against reader
+ * of cma_dev->id_list such as cma_netdev_callback() and
+ * cma_process_remove().
+ */
+ mutex_lock(&lock);
+ cma_attach_to_dev(id_priv, listen_id_priv->cma_dev);
+ mutex_unlock(&lock);
+ return 0;
+}
+
+static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
+ const struct rdma_id_private *listen_id_priv)
+{
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ const struct ib_gid_attr *sgid_attr;
+ struct cma_device *cma_dev;
+ enum ib_gid_type gid_type;
+ int ret = -ENODEV;
+ union ib_gid gid;
+ u8 port;
+
+ if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
+ id_priv->id.ps == RDMA_PS_IPOIB)
+ return -EINVAL;
+
+ memcpy(&gid, dev_addr->src_dev_addr +
+ rdma_addr_gid_offset(dev_addr), sizeof(gid));
+
+ mutex_lock(&lock);
+
+ cma_dev = listen_id_priv->cma_dev;
+ port = listen_id_priv->id.port_num;
+ gid_type = listen_id_priv->gid_type;
+ sgid_attr = cma_validate_port(cma_dev->device, port,
+ gid_type, &gid, id_priv);
+ if (!IS_ERR(sgid_attr)) {
+ id_priv->id.port_num = port;
+ cma_bind_sgid_attr(id_priv, sgid_attr);
+ ret = 0;
+ goto out;
+ }
list_for_each_entry(cma_dev, &dev_list, list) {
for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
- if (listen_id_priv &&
- listen_id_priv->cma_dev == cma_dev &&
+ if (listen_id_priv->cma_dev == cma_dev &&
listen_id_priv->id.port_num == port)
continue;
- gidp = rdma_protocol_roce(cma_dev->device, port) ?
- &iboe_gid : &gid;
gid_type = cma_dev->default_gid_type[port - 1];
sgid_attr = cma_validate_port(cma_dev->device, port,
- gid_type, gidp, id_priv);
+ gid_type, &gid, id_priv);
if (!IS_ERR(sgid_attr)) {
id_priv->id.port_num = port;
cma_bind_sgid_attr(id_priv, sgid_attr);
@@ -724,6 +816,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
dgid = (union ib_gid *) &addr->sib_addr;
pkey = ntohs(addr->sib_pkey);
+ mutex_lock(&lock);
list_for_each_entry(cur_dev, &dev_list, list) {
for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
if (!rdma_cap_af_ib(cur_dev->device, p))
@@ -750,18 +843,19 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
cma_dev = cur_dev;
sgid = gid;
id_priv->id.port_num = p;
+ goto found;
}
}
}
}
-
- if (!cma_dev)
- return -ENODEV;
+ mutex_unlock(&lock);
+ return -ENODEV;
found:
cma_attach_to_dev(id_priv, cma_dev);
- addr = (struct sockaddr_ib *) cma_src_addr(id_priv);
- memcpy(&addr->sib_addr, &sgid, sizeof sgid);
+ mutex_unlock(&lock);
+ addr = (struct sockaddr_ib *)cma_src_addr(id_priv);
+ memcpy(&addr->sib_addr, &sgid, sizeof(sgid));
cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr);
return 0;
}
@@ -783,10 +877,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
if (!id_priv)
return ERR_PTR(-ENOMEM);
- if (caller)
- id_priv->res.kern_name = caller;
- else
- rdma_restrack_set_task(&id_priv->res, current);
+ rdma_restrack_set_task(&id_priv->res, caller);
id_priv->res.type = RDMA_RESTRACK_CM_ID;
id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
@@ -1460,18 +1551,35 @@ static bool cma_protocol_roce(const struct rdma_cm_id *id)
return rdma_protocol_roce(device, port_num);
}
+static bool cma_is_req_ipv6_ll(const struct cma_req_info *req)
+{
+ const struct sockaddr *daddr =
+ (const struct sockaddr *)&req->listen_addr_storage;
+ const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr;
+
+ /* Returns true if the req is for IPv6 link local */
+ return (daddr->sa_family == AF_INET6 &&
+ (ipv6_addr_type(&daddr6->sin6_addr) & IPV6_ADDR_LINKLOCAL));
+}
+
static bool cma_match_net_dev(const struct rdma_cm_id *id,
const struct net_device *net_dev,
- u8 port_num)
+ const struct cma_req_info *req)
{
const struct rdma_addr *addr = &id->route.addr;
if (!net_dev)
/* This request is an AF_IB request */
- return (!id->port_num || id->port_num == port_num) &&
+ return (!id->port_num || id->port_num == req->port) &&
(addr->src_addr.ss_family == AF_IB);
/*
+ * If the request is not for IPv6 link local, allow matching
+ * request to any netdevice of the one or multiport rdma device.
+ */
+ if (!cma_is_req_ipv6_ll(req))
+ return true;
+ /*
* Net namespaces must match, and if the listner is listening
* on a specific netdevice than netdevice must match as well.
*/
@@ -1498,13 +1606,14 @@ static struct rdma_id_private *cma_find_listener(
hlist_for_each_entry(id_priv, &bind_list->owners, node) {
if (cma_match_private_data(id_priv, ib_event->private_data)) {
if (id_priv->id.device == cm_id->device &&
- cma_match_net_dev(&id_priv->id, net_dev, req->port))
+ cma_match_net_dev(&id_priv->id, net_dev, req))
return id_priv;
list_for_each_entry(id_priv_dev,
&id_priv->listen_list,
listen_list) {
if (id_priv_dev->id.device == cm_id->device &&
- cma_match_net_dev(&id_priv_dev->id, net_dev, req->port))
+ cma_match_net_dev(&id_priv_dev->id,
+ net_dev, req))
return id_priv_dev;
}
}
@@ -1516,18 +1625,18 @@ static struct rdma_id_private *cma_find_listener(
static struct rdma_id_private *
cma_ib_id_from_event(struct ib_cm_id *cm_id,
const struct ib_cm_event *ib_event,
+ struct cma_req_info *req,
struct net_device **net_dev)
{
- struct cma_req_info req;
struct rdma_bind_list *bind_list;
struct rdma_id_private *id_priv;
int err;
- err = cma_save_req_info(ib_event, &req);
+ err = cma_save_req_info(ib_event, req);
if (err)
return ERR_PTR(err);
- *net_dev = cma_get_net_dev(ib_event, &req);
+ *net_dev = cma_get_net_dev(ib_event, req);
if (IS_ERR(*net_dev)) {
if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
/* Assuming the protocol is AF_IB */
@@ -1565,17 +1674,17 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
}
if (!validate_net_dev(*net_dev,
- (struct sockaddr *)&req.listen_addr_storage,
- (struct sockaddr *)&req.src_addr_storage)) {
+ (struct sockaddr *)&req->listen_addr_storage,
+ (struct sockaddr *)&req->src_addr_storage)) {
id_priv = ERR_PTR(-EHOSTUNREACH);
goto err;
}
}
bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
- rdma_ps_from_service_id(req.service_id),
- cma_port_from_service_id(req.service_id));
- id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
+ rdma_ps_from_service_id(req->service_id),
+ cma_port_from_service_id(req->service_id));
+ id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev);
err:
rcu_read_unlock();
if (IS_ERR(id_priv) && *net_dev) {
@@ -1708,8 +1817,8 @@ void rdma_destroy_id(struct rdma_cm_id *id)
mutex_lock(&id_priv->handler_mutex);
mutex_unlock(&id_priv->handler_mutex);
+ rdma_restrack_del(&id_priv->res);
if (id_priv->cma_dev) {
- rdma_restrack_del(&id_priv->res);
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -1900,7 +2009,7 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
if (net_dev) {
- rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL);
+ rdma_copy_src_l2_addr(&rt->addr.dev_addr, net_dev);
} else {
if (!cma_protocol_roce(listen_id) &&
cma_any_addr(cma_src_addr(id_priv))) {
@@ -1950,7 +2059,7 @@ cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
goto err;
if (net_dev) {
- rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL);
+ rdma_copy_src_l2_addr(&id->route.addr.dev_addr, net_dev);
} else {
if (!cma_any_addr(cma_src_addr(id_priv))) {
ret = cma_translate_addr(cma_src_addr(id_priv),
@@ -1997,11 +2106,12 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
{
struct rdma_id_private *listen_id, *conn_id = NULL;
struct rdma_cm_event event = {};
+ struct cma_req_info req = {};
struct net_device *net_dev;
u8 offset;
int ret;
- listen_id = cma_ib_id_from_event(cm_id, ib_event, &net_dev);
+ listen_id = cma_ib_id_from_event(cm_id, ib_event, &req, &net_dev);
if (IS_ERR(listen_id))
return PTR_ERR(listen_id);
@@ -2034,7 +2144,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
}
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
- ret = cma_acquire_dev(conn_id, listen_id);
+ ret = cma_ib_acquire_dev(conn_id, listen_id, &req);
if (ret)
goto err2;
@@ -2230,7 +2340,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
goto out;
}
- ret = cma_acquire_dev(conn_id, listen_id);
+ ret = cma_iw_acquire_dev(conn_id, listen_id);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
@@ -2352,8 +2462,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
ret = rdma_listen(id, id_priv->backlog);
if (ret)
- pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n",
- ret, cma_dev->device->name);
+ dev_warn(&cma_dev->device->dev,
+ "RDMA CMA: cma_listen_on_dev, error %d\n", ret);
}
static void cma_listen_on_all(struct rdma_id_private *id_priv)
@@ -2400,8 +2510,8 @@ static void cma_query_handler(int status, struct sa_path_rec *path_rec,
queue_work(cma_wq, &work->work);
}
-static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
- struct cma_work *work)
+static int cma_query_ib_route(struct rdma_id_private *id_priv,
+ unsigned long timeout_ms, struct cma_work *work)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
struct sa_path_rec path_rec;
@@ -2519,7 +2629,8 @@ static void cma_init_resolve_addr_work(struct cma_work *work,
work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
}
-static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
+static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
+ unsigned long timeout_ms)
{
struct rdma_route *route = &id_priv->id.route;
struct cma_work *work;
@@ -2641,7 +2752,7 @@ err:
}
EXPORT_SYMBOL(rdma_set_ib_path);
-static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
+static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
{
struct cma_work *work;
@@ -2742,7 +2853,7 @@ err1:
return ret;
}
-int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
+int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
{
struct rdma_id_private *id_priv;
int ret;
@@ -2757,7 +2868,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
else if (rdma_protocol_roce(id->device, id->port_num))
ret = cma_resolve_iboe_route(id_priv);
else if (rdma_protocol_iwarp(id->device, id->port_num))
- ret = cma_resolve_iw_route(id_priv, timeout_ms);
+ ret = cma_resolve_iw_route(id_priv);
else
ret = -ENOSYS;
@@ -2860,7 +2971,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
if (!status && !id_priv->cma_dev) {
- status = cma_acquire_dev(id_priv, NULL);
+ status = cma_acquire_dev_by_src_ip(id_priv);
if (status)
pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
status);
@@ -2880,13 +2991,11 @@ static void addr_handler(int status, struct sockaddr *src_addr,
if (id_priv->id.event_handler(&id_priv->id, &event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
- cma_deref_id(id_priv);
rdma_destroy_id(&id_priv->id);
return;
}
out:
mutex_unlock(&id_priv->handler_mutex);
- cma_deref_id(id_priv);
}
static int cma_resolve_loopback(struct rdma_id_private *id_priv)
@@ -2964,7 +3073,7 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
}
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- const struct sockaddr *dst_addr, int timeout_ms)
+ const struct sockaddr *dst_addr, unsigned long timeout_ms)
{
struct rdma_id_private *id_priv;
int ret;
@@ -2983,16 +3092,16 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
return -EINVAL;
memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
- atomic_inc(&id_priv->refcount);
if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv);
} else {
if (dst_addr->sa_family == AF_IB) {
ret = cma_resolve_ib_addr(id_priv);
} else {
- ret = rdma_resolve_ip(cma_src_addr(id_priv),
- dst_addr, &id->route.addr.dev_addr,
- timeout_ms, addr_handler, id_priv);
+ ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
+ &id->route.addr.dev_addr,
+ timeout_ms, addr_handler,
+ false, id_priv);
}
}
if (ret)
@@ -3001,7 +3110,6 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
return 0;
err:
cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
- cma_deref_id(id_priv);
return ret;
}
EXPORT_SYMBOL(rdma_resolve_addr);
@@ -3412,7 +3520,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (ret)
goto err1;
- ret = cma_acquire_dev(id_priv, NULL);
+ ret = cma_acquire_dev_by_src_ip(id_priv);
if (ret)
goto err1;
}
@@ -3437,10 +3545,9 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0;
err2:
- if (id_priv->cma_dev) {
- rdma_restrack_del(&id_priv->res);
+ rdma_restrack_del(&id_priv->res);
+ if (id_priv->cma_dev)
cma_release_dev(id_priv);
- }
err1:
cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
return ret;
@@ -3837,10 +3944,7 @@ int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
id_priv = container_of(id, struct rdma_id_private, id);
- if (caller)
- id_priv->res.kern_name = caller;
- else
- rdma_restrack_set_task(&id_priv->res, current);
+ rdma_restrack_set_task(&id_priv->res, caller);
if (!cma_comp(id_priv, RDMA_CM_CONNECT))
return -EINVAL;
@@ -4085,9 +4189,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
(!ib_sa_sendonly_fullmem_support(&sa_client,
id_priv->id.device,
id_priv->id.port_num))) {
- pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
- "RDMA CM: SM doesn't support Send Only Full Member option\n",
- id_priv->id.device->name, id_priv->id.port_num);
+ dev_warn(
+ &id_priv->id.device->dev,
+ "RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n",
+ id_priv->id.port_num);
return -EOPNOTSUPP;
}
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index eee38b40be99..8c2dfb3e294e 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -65,7 +65,7 @@ static struct cma_dev_port_group *to_dev_port_group(struct config_item *item)
static bool filter_by_name(struct ib_device *ib_dev, void *cookie)
{
- return !strcmp(ib_dev->name, cookie);
+ return !strcmp(dev_name(&ib_dev->dev), cookie);
}
static int cma_configfs_params_get(struct config_item *item,
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 77c7005c396c..bb9007a0cca7 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -44,7 +44,7 @@
#include "mad_priv.h"
/* Total number of ports combined across all struct ib_devices's */
-#define RDMA_MAX_PORTS 1024
+#define RDMA_MAX_PORTS 8192
struct pkey_index_qp_list {
struct list_head pkey_index_list;
@@ -87,6 +87,7 @@ int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *));
void ib_device_unregister_sysfs(struct ib_device *device);
+int ib_device_rename(struct ib_device *ibdev, const char *name);
typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
struct net_device *idev, void *cookie);
@@ -338,7 +339,14 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
const union ib_gid *dgid,
- u8 *dmac, const struct net_device *ndev,
+ u8 *dmac, const struct ib_gid_attr *sgid_attr,
int *hoplimit);
+void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
+ const struct net_device *dev);
+struct sa_path_rec;
+int roce_resolve_route_from_path(struct sa_path_rec *rec,
+ const struct ib_gid_attr *attr);
+
+struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index af5ad6a56ae4..b1e5365ddafa 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -112,12 +112,12 @@ static void ib_cq_poll_work(struct work_struct *work)
IB_POLL_BATCH);
if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
- queue_work(ib_comp_wq, &cq->work);
+ queue_work(cq->comp_wq, &cq->work);
}
static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
{
- queue_work(ib_comp_wq, &cq->work);
+ queue_work(cq->comp_wq, &cq->work);
}
/**
@@ -161,7 +161,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
goto out_destroy_cq;
cq->res.type = RDMA_RESTRACK_CQ;
- cq->res.kern_name = caller;
+ rdma_restrack_set_task(&cq->res, caller);
rdma_restrack_add(&cq->res);
switch (cq->poll_ctx) {
@@ -175,9 +175,12 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
break;
case IB_POLL_WORKQUEUE:
+ case IB_POLL_UNBOUND_WORKQUEUE:
cq->comp_handler = ib_cq_completion_workqueue;
INIT_WORK(&cq->work, ib_cq_poll_work);
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+ cq->comp_wq = (cq->poll_ctx == IB_POLL_WORKQUEUE) ?
+ ib_comp_wq : ib_comp_unbound_wq;
break;
default:
ret = -EINVAL;
@@ -213,6 +216,7 @@ void ib_free_cq(struct ib_cq *cq)
irq_poll_disable(&cq->iop);
break;
case IB_POLL_WORKQUEUE:
+ case IB_POLL_UNBOUND_WORKQUEUE:
cancel_work_sync(&cq->work);
break;
default:
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index db3b6271f09d..5e70f5e1cfd9 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -61,6 +61,7 @@ struct ib_client_data {
};
struct workqueue_struct *ib_comp_wq;
+struct workqueue_struct *ib_comp_unbound_wq;
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
@@ -122,8 +123,9 @@ static int ib_device_check_mandatory(struct ib_device *device)
for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
- pr_warn("Device %s is missing mandatory function %s\n",
- device->name, mandatory_table[i].name);
+ dev_warn(&device->dev,
+ "Device is missing mandatory function %s\n",
+ mandatory_table[i].name);
return -EINVAL;
}
}
@@ -163,16 +165,40 @@ static struct ib_device *__ib_device_get_by_name(const char *name)
struct ib_device *device;
list_for_each_entry(device, &device_list, core_list)
- if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX))
+ if (!strcmp(name, dev_name(&device->dev)))
return device;
return NULL;
}
-static int alloc_name(char *name)
+int ib_device_rename(struct ib_device *ibdev, const char *name)
+{
+ struct ib_device *device;
+ int ret = 0;
+
+ if (!strcmp(name, dev_name(&ibdev->dev)))
+ return ret;
+
+ mutex_lock(&device_mutex);
+ list_for_each_entry(device, &device_list, core_list) {
+ if (!strcmp(name, dev_name(&device->dev))) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+
+ ret = device_rename(&ibdev->dev, name);
+ if (ret)
+ goto out;
+ strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
+out:
+ mutex_unlock(&device_mutex);
+ return ret;
+}
+
+static int alloc_name(struct ib_device *ibdev, const char *name)
{
unsigned long *inuse;
- char buf[IB_DEVICE_NAME_MAX];
struct ib_device *device;
int i;
@@ -181,24 +207,21 @@ static int alloc_name(char *name)
return -ENOMEM;
list_for_each_entry(device, &device_list, core_list) {
- if (!sscanf(device->name, name, &i))
+ char buf[IB_DEVICE_NAME_MAX];
+
+ if (sscanf(dev_name(&device->dev), name, &i) != 1)
continue;
if (i < 0 || i >= PAGE_SIZE * 8)
continue;
snprintf(buf, sizeof buf, name, i);
- if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX))
+ if (!strcmp(buf, dev_name(&device->dev)))
set_bit(i, inuse);
}
i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
free_page((unsigned long) inuse);
- snprintf(buf, sizeof buf, name, i);
-
- if (__ib_device_get_by_name(buf))
- return -ENFILE;
- strlcpy(name, buf, IB_DEVICE_NAME_MAX);
- return 0;
+ return dev_set_name(&ibdev->dev, name, i);
}
static void ib_device_release(struct device *device)
@@ -221,9 +244,7 @@ static void ib_device_release(struct device *device)
static int ib_device_uevent(struct device *device,
struct kobj_uevent_env *env)
{
- struct ib_device *dev = container_of(device, struct ib_device, dev);
-
- if (add_uevent_var(env, "NAME=%s", dev->name))
+ if (add_uevent_var(env, "NAME=%s", dev_name(device)))
return -ENOMEM;
/*
@@ -269,7 +290,7 @@ struct ib_device *ib_alloc_device(size_t size)
INIT_LIST_HEAD(&device->event_handler_list);
spin_lock_init(&device->event_handler_lock);
- spin_lock_init(&device->client_data_lock);
+ rwlock_init(&device->client_data_lock);
INIT_LIST_HEAD(&device->client_data_list);
INIT_LIST_HEAD(&device->port_list);
@@ -285,6 +306,7 @@ EXPORT_SYMBOL(ib_alloc_device);
*/
void ib_dealloc_device(struct ib_device *device)
{
+ WARN_ON(!list_empty(&device->client_data_list));
WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
device->reg_state != IB_DEV_UNINITIALIZED);
rdma_restrack_clean(&device->res);
@@ -295,9 +317,8 @@ EXPORT_SYMBOL(ib_dealloc_device);
static int add_client_context(struct ib_device *device, struct ib_client *client)
{
struct ib_client_data *context;
- unsigned long flags;
- context = kmalloc(sizeof *context, GFP_KERNEL);
+ context = kmalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return -ENOMEM;
@@ -306,9 +327,9 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
context->going_down = false;
down_write(&lists_rwsem);
- spin_lock_irqsave(&device->client_data_lock, flags);
+ write_lock_irq(&device->client_data_lock);
list_add(&context->list, &device->client_data_list);
- spin_unlock_irqrestore(&device->client_data_lock, flags);
+ write_unlock_irq(&device->client_data_lock);
up_write(&lists_rwsem);
return 0;
@@ -453,9 +474,9 @@ static u32 __dev_new_index(void)
* callback for each device that is added. @device must be allocated
* with ib_alloc_device().
*/
-int ib_register_device(struct ib_device *device,
- int (*port_callback)(struct ib_device *,
- u8, struct kobject *))
+int ib_register_device(struct ib_device *device, const char *name,
+ int (*port_callback)(struct ib_device *, u8,
+ struct kobject *))
{
int ret;
struct ib_client *client;
@@ -494,11 +515,20 @@ int ib_register_device(struct ib_device *device,
mutex_lock(&device_mutex);
- if (strchr(device->name, '%')) {
- ret = alloc_name(device->name);
+ if (strchr(name, '%')) {
+ ret = alloc_name(device, name);
if (ret)
goto out;
+ } else {
+ ret = dev_set_name(&device->dev, name);
+ if (ret)
+ goto out;
+ }
+ if (__ib_device_get_by_name(dev_name(&device->dev))) {
+ ret = -ENFILE;
+ goto out;
}
+ strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
if (ib_device_check_mandatory(device)) {
ret = -EINVAL;
@@ -507,40 +537,45 @@ int ib_register_device(struct ib_device *device,
ret = read_port_immutable(device);
if (ret) {
- pr_warn("Couldn't create per port immutable data %s\n",
- device->name);
+ dev_warn(&device->dev,
+ "Couldn't create per port immutable data\n");
goto out;
}
ret = setup_port_pkey_list(device);
if (ret) {
- pr_warn("Couldn't create per port_pkey_list\n");
+ dev_warn(&device->dev, "Couldn't create per port_pkey_list\n");
goto out;
}
ret = ib_cache_setup_one(device);
if (ret) {
- pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
+ dev_warn(&device->dev,
+ "Couldn't set up InfiniBand P_Key/GID cache\n");
goto port_cleanup;
}
+ device->index = __dev_new_index();
+
ret = ib_device_register_rdmacg(device);
if (ret) {
- pr_warn("Couldn't register device with rdma cgroup\n");
+ dev_warn(&device->dev,
+ "Couldn't register device with rdma cgroup\n");
goto cache_cleanup;
}
memset(&device->attrs, 0, sizeof(device->attrs));
ret = device->query_device(device, &device->attrs, &uhw);
if (ret) {
- pr_warn("Couldn't query the device attributes\n");
+ dev_warn(&device->dev,
+ "Couldn't query the device attributes\n");
goto cg_cleanup;
}
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
- pr_warn("Couldn't register device %s with driver model\n",
- device->name);
+ dev_warn(&device->dev,
+ "Couldn't register device with driver model\n");
goto cg_cleanup;
}
@@ -550,7 +585,6 @@ int ib_register_device(struct ib_device *device,
if (!add_client_context(device, client) && client->add)
client->add(device);
- device->index = __dev_new_index();
down_write(&lists_rwsem);
list_add_tail(&device->core_list, &device_list);
up_write(&lists_rwsem);
@@ -585,21 +619,20 @@ void ib_unregister_device(struct ib_device *device)
down_write(&lists_rwsem);
list_del(&device->core_list);
- spin_lock_irqsave(&device->client_data_lock, flags);
- list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
+ write_lock_irq(&device->client_data_lock);
+ list_for_each_entry(context, &device->client_data_list, list)
context->going_down = true;
- spin_unlock_irqrestore(&device->client_data_lock, flags);
+ write_unlock_irq(&device->client_data_lock);
downgrade_write(&lists_rwsem);
- list_for_each_entry_safe(context, tmp, &device->client_data_list,
- list) {
+ list_for_each_entry(context, &device->client_data_list, list) {
if (context->client->remove)
context->client->remove(device, context->data);
}
up_read(&lists_rwsem);
- ib_device_unregister_rdmacg(device);
ib_device_unregister_sysfs(device);
+ ib_device_unregister_rdmacg(device);
mutex_unlock(&device_mutex);
@@ -609,10 +642,13 @@ void ib_unregister_device(struct ib_device *device)
kfree(device->port_pkey_list);
down_write(&lists_rwsem);
- spin_lock_irqsave(&device->client_data_lock, flags);
- list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
+ write_lock_irqsave(&device->client_data_lock, flags);
+ list_for_each_entry_safe(context, tmp, &device->client_data_list,
+ list) {
+ list_del(&context->list);
kfree(context);
- spin_unlock_irqrestore(&device->client_data_lock, flags);
+ }
+ write_unlock_irqrestore(&device->client_data_lock, flags);
up_write(&lists_rwsem);
device->reg_state = IB_DEV_UNREGISTERED;
@@ -662,9 +698,8 @@ EXPORT_SYMBOL(ib_register_client);
*/
void ib_unregister_client(struct ib_client *client)
{
- struct ib_client_data *context, *tmp;
+ struct ib_client_data *context;
struct ib_device *device;
- unsigned long flags;
mutex_lock(&device_mutex);
@@ -676,14 +711,14 @@ void ib_unregister_client(struct ib_client *client)
struct ib_client_data *found_context = NULL;
down_write(&lists_rwsem);
- spin_lock_irqsave(&device->client_data_lock, flags);
- list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
+ write_lock_irq(&device->client_data_lock);
+ list_for_each_entry(context, &device->client_data_list, list)
if (context->client == client) {
context->going_down = true;
found_context = context;
break;
}
- spin_unlock_irqrestore(&device->client_data_lock, flags);
+ write_unlock_irq(&device->client_data_lock);
up_write(&lists_rwsem);
if (client->remove)
@@ -691,17 +726,18 @@ void ib_unregister_client(struct ib_client *client)
found_context->data : NULL);
if (!found_context) {
- pr_warn("No client context found for %s/%s\n",
- device->name, client->name);
+ dev_warn(&device->dev,
+ "No client context found for %s\n",
+ client->name);
continue;
}
down_write(&lists_rwsem);
- spin_lock_irqsave(&device->client_data_lock, flags);
+ write_lock_irq(&device->client_data_lock);
list_del(&found_context->list);
- kfree(found_context);
- spin_unlock_irqrestore(&device->client_data_lock, flags);
+ write_unlock_irq(&device->client_data_lock);
up_write(&lists_rwsem);
+ kfree(found_context);
}
mutex_unlock(&device_mutex);
@@ -722,13 +758,13 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
void *ret = NULL;
unsigned long flags;
- spin_lock_irqsave(&device->client_data_lock, flags);
+ read_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry(context, &device->client_data_list, list)
if (context->client == client) {
ret = context->data;
break;
}
- spin_unlock_irqrestore(&device->client_data_lock, flags);
+ read_unlock_irqrestore(&device->client_data_lock, flags);
return ret;
}
@@ -749,18 +785,18 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client,
struct ib_client_data *context;
unsigned long flags;
- spin_lock_irqsave(&device->client_data_lock, flags);
+ write_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry(context, &device->client_data_list, list)
if (context->client == client) {
context->data = data;
goto out;
}
- pr_warn("No client context found for %s/%s\n",
- device->name, client->name);
+ dev_warn(&device->dev, "No client context found for %s\n",
+ client->name);
out:
- spin_unlock_irqrestore(&device->client_data_lock, flags);
+ write_unlock_irqrestore(&device->client_data_lock, flags);
}
EXPORT_SYMBOL(ib_set_client_data);
@@ -1166,10 +1202,19 @@ static int __init ib_core_init(void)
goto err;
}
+ ib_comp_unbound_wq =
+ alloc_workqueue("ib-comp-unb-wq",
+ WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
+ WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
+ if (!ib_comp_unbound_wq) {
+ ret = -ENOMEM;
+ goto err_comp;
+ }
+
ret = class_register(&ib_class);
if (ret) {
pr_warn("Couldn't create InfiniBand device class\n");
- goto err_comp;
+ goto err_comp_unbound;
}
ret = rdma_nl_init();
@@ -1218,6 +1263,8 @@ err_ibnl:
rdma_nl_exit();
err_sysfs:
class_unregister(&ib_class);
+err_comp_unbound:
+ destroy_workqueue(ib_comp_unbound_wq);
err_comp:
destroy_workqueue(ib_comp_wq);
err:
@@ -1236,6 +1283,7 @@ static void __exit ib_core_cleanup(void)
addr_cleanup();
rdma_nl_exit();
class_unregister(&ib_class);
+ destroy_workqueue(ib_comp_unbound_wq);
destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index a077500f7f32..83ba0068e8bb 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -213,7 +213,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
device = pd->device;
if (!device->alloc_fmr || !device->dealloc_fmr ||
!device->map_phys_fmr || !device->unmap_fmr) {
- pr_info(PFX "Device %s does not support FMRs\n", device->name);
+ dev_info(&device->dev, "Device does not support FMRs\n");
return ERR_PTR(-ENOSYS);
}
@@ -257,7 +257,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
atomic_set(&pool->flush_ser, 0);
init_waitqueue_head(&pool->force_wait);
- pool->worker = kthread_create_worker(0, "ib_fmr(%s)", device->name);
+ pool->worker =
+ kthread_create_worker(0, "ib_fmr(%s)", dev_name(&device->dev));
if (IS_ERR(pool->worker)) {
pr_warn(PFX "couldn't start cleanup kthread worker\n");
ret = PTR_ERR(pool->worker);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 5d676cff41f4..ba668d49c751 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -509,7 +509,7 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
cm_id->m_local_addr = cm_id->local_addr;
cm_id->m_remote_addr = cm_id->remote_addr;
- memcpy(pm_reg_msg.dev_name, cm_id->device->name,
+ memcpy(pm_reg_msg.dev_name, dev_name(&cm_id->device->dev),
sizeof(pm_reg_msg.dev_name));
memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname,
sizeof(pm_reg_msg.if_name));
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ef459f2f2eeb..d7025cd5be28 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -220,33 +220,37 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
int ret2, qpn;
u8 mgmt_class, vclass;
+ if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) ||
+ (qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num)))
+ return ERR_PTR(-EPROTONOSUPPORT);
+
/* Validate parameters */
qpn = get_spl_qp_index(qp_type);
if (qpn == -1) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: invalid QP Type %d\n",
- qp_type);
+ dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n",
+ __func__, qp_type);
goto error1;
}
if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: invalid RMPP Version %u\n",
- rmpp_version);
+ dev_dbg_ratelimited(&device->dev,
+ "%s: invalid RMPP Version %u\n",
+ __func__, rmpp_version);
goto error1;
}
/* Validate MAD registration request if supplied */
if (mad_reg_req) {
if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: invalid Class Version %u\n",
- mad_reg_req->mgmt_class_version);
+ dev_dbg_ratelimited(&device->dev,
+ "%s: invalid Class Version %u\n",
+ __func__,
+ mad_reg_req->mgmt_class_version);
goto error1;
}
if (!recv_handler) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: no recv_handler\n");
+ dev_dbg_ratelimited(&device->dev,
+ "%s: no recv_handler\n", __func__);
goto error1;
}
if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
@@ -256,9 +260,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
*/
if (mad_reg_req->mgmt_class !=
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n",
- mad_reg_req->mgmt_class);
+ dev_dbg_ratelimited(&device->dev,
+ "%s: Invalid Mgmt Class 0x%x\n",
+ __func__, mad_reg_req->mgmt_class);
goto error1;
}
} else if (mad_reg_req->mgmt_class == 0) {
@@ -266,8 +270,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
* Class 0 is reserved in IBA and is used for
* aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
*/
- dev_notice(&device->dev,
- "ib_register_mad_agent: Invalid Mgmt Class 0\n");
+ dev_dbg_ratelimited(&device->dev,
+ "%s: Invalid Mgmt Class 0\n",
+ __func__);
goto error1;
} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
/*
@@ -275,18 +280,19 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
* ensure supplied OUI is not zero
*/
if (!is_vendor_oui(mad_reg_req->oui)) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: No OUI specified for class 0x%x\n",
- mad_reg_req->mgmt_class);
+ dev_dbg_ratelimited(&device->dev,
+ "%s: No OUI specified for class 0x%x\n",
+ __func__,
+ mad_reg_req->mgmt_class);
goto error1;
}
}
/* Make sure class supplied is consistent with RMPP */
if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
if (rmpp_version) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n",
- mad_reg_req->mgmt_class);
+ dev_dbg_ratelimited(&device->dev,
+ "%s: RMPP version for non-RMPP class 0x%x\n",
+ __func__, mad_reg_req->mgmt_class);
goto error1;
}
}
@@ -297,9 +303,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
(mad_reg_req->mgmt_class !=
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n",
- mad_reg_req->mgmt_class);
+ dev_dbg_ratelimited(&device->dev,
+ "%s: Invalid SM QP type: class 0x%x\n",
+ __func__, mad_reg_req->mgmt_class);
goto error1;
}
} else {
@@ -307,9 +313,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
(mad_reg_req->mgmt_class ==
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n",
- mad_reg_req->mgmt_class);
+ dev_dbg_ratelimited(&device->dev,
+ "%s: Invalid GS QP type: class 0x%x\n",
+ __func__, mad_reg_req->mgmt_class);
goto error1;
}
}
@@ -324,18 +330,18 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Validate device and port */
port_priv = ib_get_mad_port(device, port_num);
if (!port_priv) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: Invalid port %d\n",
- port_num);
+ dev_dbg_ratelimited(&device->dev, "%s: Invalid port %d\n",
+ __func__, port_num);
ret = ERR_PTR(-ENODEV);
goto error1;
}
- /* Verify the QP requested is supported. For example, Ethernet devices
- * will not have QP0 */
+ /* Verify the QP requested is supported. For example, Ethernet devices
+ * will not have QP0.
+ */
if (!port_priv->qp_info[qpn].qp) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: QP %d not supported\n", qpn);
+ dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n",
+ __func__, qpn);
ret = ERR_PTR(-EPROTONOSUPPORT);
goto error1;
}
@@ -2408,7 +2414,7 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
}
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
- int timeout_ms)
+ unsigned long timeout_ms)
{
mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
wait_for_response(mad_send_wr);
@@ -3183,7 +3189,7 @@ static int ib_mad_port_open(struct ib_device *device,
cq_size *= 2;
port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
- IB_POLL_WORKQUEUE);
+ IB_POLL_UNBOUND_WORKQUEUE);
if (IS_ERR(port_priv->cq)) {
dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
ret = PTR_ERR(port_priv->cq);
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index d84ae1671898..216509036aa8 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -221,6 +221,6 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr);
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
- int timeout_ms);
+ unsigned long timeout_ms);
#endif /* __IB_MAD_PRIV_H__ */
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 3ccaae18ad75..724f5a62e82f 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -47,9 +47,9 @@ static struct {
const struct rdma_nl_cbs *cb_table;
} rdma_nl_types[RDMA_NL_NUM_CLIENTS];
-int rdma_nl_chk_listeners(unsigned int group)
+bool rdma_nl_chk_listeners(unsigned int group)
{
- return (netlink_has_listeners(nls, group)) ? 0 : -1;
+ return netlink_has_listeners(nls, group);
}
EXPORT_SYMBOL(rdma_nl_chk_listeners);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 0385ab438320..573399e3ccc1 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -179,7 +179,8 @@ static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
{
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
return -EMSGSIZE;
- if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
+ dev_name(&device->dev)))
return -EMSGSIZE;
return 0;
@@ -645,6 +646,36 @@ err:
return err;
}
+static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ u32 index;
+ int err;
+
+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
+ extack);
+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(index);
+ if (!device)
+ return -EINVAL;
+
+ if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
+ char name[IB_DEVICE_NAME_MAX] = {};
+
+ nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
+ IB_DEVICE_NAME_MAX);
+ err = ib_device_rename(device, name);
+ }
+
+ put_device(&device->dev);
+ return err;
+}
+
static int _nldev_get_dumpit(struct ib_device *device,
struct sk_buff *skb,
struct netlink_callback *cb,
@@ -1077,6 +1108,10 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_get_doit,
.dump = nldev_get_dumpit,
},
+ [RDMA_NLDEV_CMD_SET] = {
+ .doit = nldev_set_doit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
[RDMA_NLDEV_CMD_PORT_GET] = {
.doit = nldev_port_get_doit,
.dump = nldev_port_get_dumpit,
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 6eb64c6f0802..752a55c6bdce 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -794,44 +794,6 @@ void uverbs_close_fd(struct file *f)
uverbs_uobject_put(uobj);
}
-static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext)
-{
- struct ib_device *ib_dev = ibcontext->device;
- struct task_struct *owning_process = NULL;
- struct mm_struct *owning_mm = NULL;
-
- owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
- if (!owning_process)
- return;
-
- owning_mm = get_task_mm(owning_process);
- if (!owning_mm) {
- pr_info("no mm, disassociate ucontext is pending task termination\n");
- while (1) {
- put_task_struct(owning_process);
- usleep_range(1000, 2000);
- owning_process = get_pid_task(ibcontext->tgid,
- PIDTYPE_PID);
- if (!owning_process ||
- owning_process->state == TASK_DEAD) {
- pr_info("disassociate ucontext done, task was terminated\n");
- /* in case task was dead need to release the
- * task struct.
- */
- if (owning_process)
- put_task_struct(owning_process);
- return;
- }
- }
- }
-
- down_write(&owning_mm->mmap_sem);
- ib_dev->disassociate_ucontext(ibcontext);
- up_write(&owning_mm->mmap_sem);
- mmput(owning_mm);
- put_task_struct(owning_process);
-}
-
/*
* Drop the ucontext off the ufile and completely disconnect it from the
* ib_device
@@ -840,20 +802,28 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
enum rdma_remove_reason reason)
{
struct ib_ucontext *ucontext = ufile->ucontext;
+ struct ib_device *ib_dev = ucontext->device;
int ret;
- if (reason == RDMA_REMOVE_DRIVER_REMOVE)
- ufile_disassociate_ucontext(ucontext);
+ /*
+ * If we are closing the FD then the user mmap VMAs must have
+ * already been destroyed as they hold on to the filep, otherwise
+ * they need to be zap'd.
+ */
+ if (reason == RDMA_REMOVE_DRIVER_REMOVE) {
+ uverbs_user_mmap_disassociate(ufile);
+ if (ib_dev->disassociate_ucontext)
+ ib_dev->disassociate_ucontext(ucontext);
+ }
- put_pid(ucontext->tgid);
- ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
+ ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev,
RDMACG_RESOURCE_HCA_HANDLE);
/*
* FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
* the error return.
*/
- ret = ucontext->device->dealloc_ucontext(ucontext);
+ ret = ib_dev->dealloc_ucontext(ucontext);
WARN_ON(ret);
ufile->ucontext = NULL;
@@ -882,6 +852,8 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE));
if (!uverbs_destroy_uobject(obj, reason))
ret = 0;
+ else
+ atomic_set(&obj->usecnt, 0);
}
return ret;
}
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index f962f2a593ba..4886d2bba7c7 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -160,5 +160,6 @@ void uverbs_disassociate_api(struct uverbs_api *uapi);
void uverbs_destroy_api(struct uverbs_api *uapi);
void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
unsigned int num_attrs);
+void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
#endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 3b7fa0ccaa08..06d8657ce583 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -50,8 +50,7 @@ void rdma_restrack_clean(struct rdma_restrack_root *res)
dev = container_of(res, struct ib_device, res);
pr_err("restrack: %s", CUT_HERE);
- pr_err("restrack: BUG: RESTRACK detected leak of resources on %s\n",
- dev->name);
+ dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
hash_for_each(res->hash, bkt, e, node) {
if (rdma_is_kernel_res(e)) {
owner = e->kern_name;
@@ -156,6 +155,21 @@ static bool res_is_user(struct rdma_restrack_entry *res)
}
}
+void rdma_restrack_set_task(struct rdma_restrack_entry *res,
+ const char *caller)
+{
+ if (caller) {
+ res->kern_name = caller;
+ return;
+ }
+
+ if (res->task)
+ put_task_struct(res->task);
+ get_task_struct(current);
+ res->task = current;
+}
+EXPORT_SYMBOL(rdma_restrack_set_task);
+
void rdma_restrack_add(struct rdma_restrack_entry *res)
{
struct ib_device *dev = res_to_dev(res);
@@ -168,7 +182,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
if (res_is_user(res)) {
if (!res->task)
- rdma_restrack_set_task(res, current);
+ rdma_restrack_set_task(res, NULL);
res->kern_name = NULL;
} else {
set_kern_name(res);
@@ -209,7 +223,7 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
struct ib_device *dev;
if (!res->valid)
- return;
+ goto out;
dev = res_to_dev(res);
if (!dev)
@@ -222,8 +236,12 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
down_write(&dev->res.rwsem);
hash_del(&res->node);
res->valid = false;
- if (res->task)
- put_task_struct(res->task);
up_write(&dev->res.rwsem);
+
+out:
+ if (res->task) {
+ put_task_struct(res->task);
+ res->task = NULL;
+ }
}
EXPORT_SYMBOL(rdma_restrack_del);
diff --git a/drivers/infiniband/core/sa.h b/drivers/infiniband/core/sa.h
index b1d4bbf4ce5c..cbaaaa92fff3 100644
--- a/drivers/infiniband/core/sa.h
+++ b/drivers/infiniband/core/sa.h
@@ -49,16 +49,14 @@ static inline void ib_sa_client_put(struct ib_sa_client *client)
}
int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- u8 method,
+ struct ib_device *device, u8 port_num, u8 method,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
+ unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_mcmember_rec *resp,
void *context),
- void *context,
- struct ib_sa_query **sa_query);
+ void *context, struct ib_sa_query **sa_query);
int mcast_init(void);
void mcast_cleanup(void);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 7b794a14d6e8..be5ba5e15496 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -761,7 +761,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
/* Construct the family header first */
header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
- memcpy(header->device_name, query->port->agent->device->name,
+ memcpy(header->device_name, dev_name(&query->port->agent->device->dev),
LS_DEVICE_NAME_MAX);
header->port_num = query->port->port_num;
@@ -835,7 +835,6 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
void *data;
- int ret = 0;
struct ib_sa_mad *mad;
int len;
@@ -862,13 +861,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
- ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
- if (!ret)
- ret = len;
- else
- ret = 0;
-
- return ret;
+ return rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
}
static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
@@ -891,14 +884,12 @@ static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
ret = ib_nl_send_msg(query, gfp_mask);
- if (ret <= 0) {
+ if (ret) {
ret = -EIO;
/* Remove the request */
spin_lock_irqsave(&ib_nl_request_lock, flags);
list_del(&query->list);
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
- } else {
- ret = 0;
}
return ret;
@@ -1227,46 +1218,6 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
return src_path_mask;
}
-static int roce_resolve_route_from_path(struct sa_path_rec *rec,
- const struct ib_gid_attr *attr)
-{
- struct rdma_dev_addr dev_addr = {};
- union {
- struct sockaddr _sockaddr;
- struct sockaddr_in _sockaddr_in;
- struct sockaddr_in6 _sockaddr_in6;
- } sgid_addr, dgid_addr;
- int ret;
-
- if (rec->roce.route_resolved)
- return 0;
- if (!attr || !attr->ndev)
- return -EINVAL;
-
- dev_addr.bound_dev_if = attr->ndev->ifindex;
- /* TODO: Use net from the ib_gid_attr once it is added to it,
- * until than, limit itself to init_net.
- */
- dev_addr.net = &init_net;
-
- rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
- rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
-
- /* validate the route */
- ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
- &dgid_addr._sockaddr, &dev_addr);
- if (ret)
- return ret;
-
- if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
- dev_addr.network == RDMA_NETWORK_IPV6) &&
- rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
- return -EINVAL;
-
- rec->roce.route_resolved = true;
- return 0;
-}
-
static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
struct sa_path_rec *rec,
struct rdma_ah_attr *ah_attr,
@@ -1409,7 +1360,8 @@ static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
spin_unlock_irqrestore(&tid_lock, flags);
}
-static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
+static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
+ gfp_t gfp_mask)
{
bool preload = gfpflags_allow_blocking(gfp_mask);
unsigned long flags;
@@ -1433,7 +1385,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
(!(query->flags & IB_SA_QUERY_OPA))) {
- if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
+ if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
if (!ib_nl_make_request(query, gfp_mask))
return id;
}
@@ -1599,7 +1551,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct sa_path_rec *rec,
ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
+ unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct sa_path_rec *resp,
void *context),
@@ -1753,7 +1705,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num, u8 method,
struct ib_sa_service_rec *rec,
ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
+ unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_service_rec *resp,
void *context),
@@ -1850,7 +1802,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
u8 method,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
+ unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_mcmember_rec *resp,
void *context),
@@ -1941,7 +1893,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct ib_sa_guidinfo_rec *rec,
ib_sa_comp_mask comp_mask, u8 method,
- int timeout_ms, gfp_t gfp_mask,
+ unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_guidinfo_rec *resp,
void *context),
@@ -2108,7 +2060,7 @@ static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
}
static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
- int timeout_ms,
+ unsigned long timeout_ms,
void (*callback)(void *context),
void *context,
struct ib_sa_query **sa_query)
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 9b0bea8303e0..1143c0448666 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -685,9 +685,8 @@ static int ib_mad_agent_security_change(struct notifier_block *nb,
if (event != LSM_POLICY_CHANGE)
return NOTIFY_DONE;
- ag->smp_allowed = !security_ib_endport_manage_subnet(ag->security,
- ag->device->name,
- ag->port_num);
+ ag->smp_allowed = !security_ib_endport_manage_subnet(
+ ag->security, dev_name(&ag->device->dev), ag->port_num);
return NOTIFY_OK;
}
@@ -708,7 +707,7 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
return 0;
ret = security_ib_endport_manage_subnet(agent->security,
- agent->device->name,
+ dev_name(&agent->device->dev),
agent->port_num);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 7fd14ead7b37..f54f107ef668 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -512,7 +512,7 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
40 + offset / 8, sizeof(data));
if (ret < 0)
- return sprintf(buf, "N/A (no PMA)\n");
+ return ret;
switch (width) {
case 4:
@@ -1036,7 +1036,7 @@ static int add_port(struct ib_device *device, int port_num,
p->port_num = port_num;
ret = kobject_init_and_add(&p->kobj, &port_type,
- device->ports_parent,
+ device->ports_kobj,
"%d", port_num);
if (ret) {
kfree(p);
@@ -1057,10 +1057,12 @@ static int add_port(struct ib_device *device, int port_num,
goto err_put;
}
- p->pma_table = get_counter_table(device, port_num);
- ret = sysfs_create_group(&p->kobj, p->pma_table);
- if (ret)
- goto err_put_gid_attrs;
+ if (device->process_mad) {
+ p->pma_table = get_counter_table(device, port_num);
+ ret = sysfs_create_group(&p->kobj, p->pma_table);
+ if (ret)
+ goto err_put_gid_attrs;
+ }
p->gid_group.name = "gids";
p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
@@ -1173,7 +1175,8 @@ err_free_gid:
p->gid_group.attrs = NULL;
err_remove_pma:
- sysfs_remove_group(&p->kobj, p->pma_table);
+ if (p->pma_table)
+ sysfs_remove_group(&p->kobj, p->pma_table);
err_put_gid_attrs:
kobject_put(&p->gid_attr_group->kobj);
@@ -1183,7 +1186,7 @@ err_put:
return ret;
}
-static ssize_t show_node_type(struct device *device,
+static ssize_t node_type_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1198,8 +1201,9 @@ static ssize_t show_node_type(struct device *device,
default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
}
}
+static DEVICE_ATTR_RO(node_type);
-static ssize_t show_sys_image_guid(struct device *device,
+static ssize_t sys_image_guid_show(struct device *device,
struct device_attribute *dev_attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1210,8 +1214,9 @@ static ssize_t show_sys_image_guid(struct device *device,
be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
}
+static DEVICE_ATTR_RO(sys_image_guid);
-static ssize_t show_node_guid(struct device *device,
+static ssize_t node_guid_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1222,8 +1227,9 @@ static ssize_t show_node_guid(struct device *device,
be16_to_cpu(((__be16 *) &dev->node_guid)[2]),
be16_to_cpu(((__be16 *) &dev->node_guid)[3]));
}
+static DEVICE_ATTR_RO(node_guid);
-static ssize_t show_node_desc(struct device *device,
+static ssize_t node_desc_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1231,9 +1237,9 @@ static ssize_t show_node_desc(struct device *device,
return sprintf(buf, "%.64s\n", dev->node_desc);
}
-static ssize_t set_node_desc(struct device *device,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t node_desc_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
struct ib_device_modify desc = {};
@@ -1249,8 +1255,9 @@ static ssize_t set_node_desc(struct device *device,
return count;
}
+static DEVICE_ATTR_RW(node_desc);
-static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr,
char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1259,19 +1266,19 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX);
return strlen(buf);
}
+static DEVICE_ATTR_RO(fw_ver);
+
+static struct attribute *ib_dev_attrs[] = {
+ &dev_attr_node_type.attr,
+ &dev_attr_node_guid.attr,
+ &dev_attr_sys_image_guid.attr,
+ &dev_attr_fw_ver.attr,
+ &dev_attr_node_desc.attr,
+ NULL,
+};
-static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
-static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
-static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
-static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
-
-static struct device_attribute *ib_class_attributes[] = {
- &dev_attr_node_type,
- &dev_attr_sys_image_guid,
- &dev_attr_node_guid,
- &dev_attr_node_desc,
- &dev_attr_fw_ver,
+static const struct attribute_group dev_attr_group = {
+ .attrs = ib_dev_attrs,
};
static void free_port_list_attributes(struct ib_device *device)
@@ -1285,7 +1292,9 @@ static void free_port_list_attributes(struct ib_device *device)
kfree(port->hw_stats);
free_hsag(&port->kobj, port->hw_stats_ag);
}
- sysfs_remove_group(p, port->pma_table);
+
+ if (port->pma_table)
+ sysfs_remove_group(p, port->pma_table);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
sysfs_remove_group(&port->gid_attr_group->kobj,
@@ -1296,7 +1305,7 @@ static void free_port_list_attributes(struct ib_device *device)
kobject_put(p);
}
- kobject_put(device->ports_parent);
+ kobject_put(device->ports_kobj);
}
int ib_device_register_sysfs(struct ib_device *device,
@@ -1307,23 +1316,15 @@ int ib_device_register_sysfs(struct ib_device *device,
int ret;
int i;
- ret = dev_set_name(class_dev, "%s", device->name);
- if (ret)
- return ret;
+ device->groups[0] = &dev_attr_group;
+ class_dev->groups = device->groups;
ret = device_add(class_dev);
if (ret)
goto err;
- for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
- ret = device_create_file(class_dev, ib_class_attributes[i]);
- if (ret)
- goto err_unregister;
- }
-
- device->ports_parent = kobject_create_and_add("ports",
- &class_dev->kobj);
- if (!device->ports_parent) {
+ device->ports_kobj = kobject_create_and_add("ports", &class_dev->kobj);
+ if (!device->ports_kobj) {
ret = -ENOMEM;
goto err_put;
}
@@ -1347,20 +1348,15 @@ int ib_device_register_sysfs(struct ib_device *device,
err_put:
free_port_list_attributes(device);
-
-err_unregister:
device_del(class_dev);
-
err:
return ret;
}
void ib_device_unregister_sysfs(struct ib_device *device)
{
- int i;
-
- /* Hold kobject until ib_dealloc_device() */
- kobject_get(&device->dev.kobj);
+ /* Hold device until ib_dealloc_device() */
+ get_device(&device->dev);
free_port_list_attributes(device);
@@ -1369,8 +1365,5 @@ void ib_device_unregister_sysfs(struct ib_device *device)
free_hsag(&device->dev.kobj, device->hw_stats_ag);
}
- for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
- device_remove_file(&device->dev, ib_class_attributes[i]);
-
device_unregister(&device->dev);
}
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index ec8fb289621f..21863ddde63e 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -124,6 +124,8 @@ static DEFINE_MUTEX(mut);
static DEFINE_IDR(ctx_idr);
static DEFINE_IDR(multicast_idr);
+static const struct file_operations ucma_fops;
+
static inline struct ucma_context *_ucma_find_context(int id,
struct ucma_file *file)
{
@@ -1581,6 +1583,10 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
f = fdget(cmd.fd);
if (!f.file)
return -ENOENT;
+ if (f.file->f_op != &ucma_fops) {
+ ret = -EINVAL;
+ goto file_put;
+ }
/* Validate current fd and prevent destruction of id. */
ctx = ucma_get_ctx(f.file->private_data, cmd.id);
@@ -1753,6 +1759,8 @@ static int ucma_close(struct inode *inode, struct file *filp)
mutex_lock(&mut);
if (!ctx->closing) {
mutex_unlock(&mut);
+ ucma_put_ctx(ctx);
+ wait_for_completion(&ctx->comp);
/* rdma_destroy_id ensures that no event handlers are
* inflight for that id before releasing it.
*/
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index a41792dbae1f..c6144df47ea4 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -85,7 +85,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
struct page **page_list;
struct vm_area_struct **vma_list;
unsigned long lock_limit;
+ unsigned long new_pinned;
unsigned long cur_base;
+ struct mm_struct *mm;
unsigned long npages;
int ret;
int i;
@@ -107,25 +109,32 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (!can_do_mlock())
return ERR_PTR(-EPERM);
- umem = kzalloc(sizeof *umem, GFP_KERNEL);
- if (!umem)
- return ERR_PTR(-ENOMEM);
+ if (access & IB_ACCESS_ON_DEMAND) {
+ umem = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+ umem->is_odp = 1;
+ } else {
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+ }
umem->context = context;
umem->length = size;
umem->address = addr;
umem->page_shift = PAGE_SHIFT;
umem->writable = ib_access_writable(access);
+ umem->owning_mm = mm = current->mm;
+ mmgrab(mm);
if (access & IB_ACCESS_ON_DEMAND) {
- ret = ib_umem_odp_get(context, umem, access);
+ ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
if (ret)
goto umem_kfree;
return umem;
}
- umem->odp_data = NULL;
-
/* We assume the memory is from hugetlb until proved otherwise */
umem->hugetlb = 1;
@@ -144,25 +153,25 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
umem->hugetlb = 0;
npages = ib_umem_num_pages(umem);
+ if (npages == 0 || npages > UINT_MAX) {
+ ret = -EINVAL;
+ goto out;
+ }
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- down_write(&current->mm->mmap_sem);
- current->mm->pinned_vm += npages;
- if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) {
- up_write(&current->mm->mmap_sem);
+ down_write(&mm->mmap_sem);
+ if (check_add_overflow(mm->pinned_vm, npages, &new_pinned) ||
+ (new_pinned > lock_limit && !capable(CAP_IPC_LOCK))) {
+ up_write(&mm->mmap_sem);
ret = -ENOMEM;
- goto vma;
+ goto out;
}
- up_write(&current->mm->mmap_sem);
+ mm->pinned_vm = new_pinned;
+ up_write(&mm->mmap_sem);
cur_base = addr & PAGE_MASK;
- if (npages == 0 || npages > UINT_MAX) {
- ret = -EINVAL;
- goto vma;
- }
-
ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
if (ret)
goto vma;
@@ -172,14 +181,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
sg_list_start = umem->sg_head.sgl;
- down_read(&current->mm->mmap_sem);
while (npages) {
+ down_read(&mm->mmap_sem);
ret = get_user_pages_longterm(cur_base,
min_t(unsigned long, npages,
PAGE_SIZE / sizeof (struct page *)),
gup_flags, page_list, vma_list);
if (ret < 0) {
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
goto umem_release;
}
@@ -187,17 +196,20 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
cur_base += ret * PAGE_SIZE;
npages -= ret;
+ /* Continue to hold the mmap_sem as vma_list access
+ * needs to be protected.
+ */
for_each_sg(sg_list_start, sg, ret, i) {
if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
umem->hugetlb = 0;
sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
}
+ up_read(&mm->mmap_sem);
/* preparing for next loop */
sg_list_start = sg;
}
- up_read(&current->mm->mmap_sem);
umem->nmap = ib_dma_map_sg_attrs(context->device,
umem->sg_head.sgl,
@@ -216,29 +228,40 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
umem_release:
__ib_umem_release(context->device, umem, 0);
vma:
- down_write(&current->mm->mmap_sem);
- current->mm->pinned_vm -= ib_umem_num_pages(umem);
- up_write(&current->mm->mmap_sem);
+ down_write(&mm->mmap_sem);
+ mm->pinned_vm -= ib_umem_num_pages(umem);
+ up_write(&mm->mmap_sem);
out:
if (vma_list)
free_page((unsigned long) vma_list);
free_page((unsigned long) page_list);
umem_kfree:
- if (ret)
+ if (ret) {
+ mmdrop(umem->owning_mm);
kfree(umem);
+ }
return ret ? ERR_PTR(ret) : umem;
}
EXPORT_SYMBOL(ib_umem_get);
-static void ib_umem_account(struct work_struct *work)
+static void __ib_umem_release_tail(struct ib_umem *umem)
+{
+ mmdrop(umem->owning_mm);
+ if (umem->is_odp)
+ kfree(to_ib_umem_odp(umem));
+ else
+ kfree(umem);
+}
+
+static void ib_umem_release_defer(struct work_struct *work)
{
struct ib_umem *umem = container_of(work, struct ib_umem, work);
- down_write(&umem->mm->mmap_sem);
- umem->mm->pinned_vm -= umem->diff;
- up_write(&umem->mm->mmap_sem);
- mmput(umem->mm);
- kfree(umem);
+ down_write(&umem->owning_mm->mmap_sem);
+ umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
+ up_write(&umem->owning_mm->mmap_sem);
+
+ __ib_umem_release_tail(umem);
}
/**
@@ -248,52 +271,36 @@ static void ib_umem_account(struct work_struct *work)
void ib_umem_release(struct ib_umem *umem)
{
struct ib_ucontext *context = umem->context;
- struct mm_struct *mm;
- struct task_struct *task;
- unsigned long diff;
- if (umem->odp_data) {
- ib_umem_odp_release(umem);
+ if (umem->is_odp) {
+ ib_umem_odp_release(to_ib_umem_odp(umem));
+ __ib_umem_release_tail(umem);
return;
}
__ib_umem_release(umem->context->device, umem, 1);
- task = get_pid_task(umem->context->tgid, PIDTYPE_PID);
- if (!task)
- goto out;
- mm = get_task_mm(task);
- put_task_struct(task);
- if (!mm)
- goto out;
-
- diff = ib_umem_num_pages(umem);
-
/*
* We may be called with the mm's mmap_sem already held. This
* can happen when a userspace munmap() is the call that drops
* the last reference to our file and calls our release
* method. If there are memory regions to destroy, we'll end
* up here and not be able to take the mmap_sem. In that case
- * we defer the vm_locked accounting to the system workqueue.
+ * we defer the vm_locked accounting a workqueue.
*/
if (context->closing) {
- if (!down_write_trylock(&mm->mmap_sem)) {
- INIT_WORK(&umem->work, ib_umem_account);
- umem->mm = mm;
- umem->diff = diff;
-
+ if (!down_write_trylock(&umem->owning_mm->mmap_sem)) {
+ INIT_WORK(&umem->work, ib_umem_release_defer);
queue_work(ib_wq, &umem->work);
return;
}
- } else
- down_write(&mm->mmap_sem);
+ } else {
+ down_write(&umem->owning_mm->mmap_sem);
+ }
+ umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
+ up_write(&umem->owning_mm->mmap_sem);
- mm->pinned_vm -= diff;
- up_write(&mm->mmap_sem);
- mmput(mm);
-out:
- kfree(umem);
+ __ib_umem_release_tail(umem);
}
EXPORT_SYMBOL(ib_umem_release);
@@ -303,7 +310,7 @@ int ib_umem_page_count(struct ib_umem *umem)
int n;
struct scatterlist *sg;
- if (umem->odp_data)
+ if (umem->is_odp)
return ib_umem_num_pages(umem);
n = 0;
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 6ec748eccff7..2b4c5e7dd5a1 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -58,7 +58,7 @@ static u64 node_start(struct umem_odp_node *n)
struct ib_umem_odp *umem_odp =
container_of(n, struct ib_umem_odp, interval_tree);
- return ib_umem_start(umem_odp->umem);
+ return ib_umem_start(&umem_odp->umem);
}
/* Note that the representation of the intervals in the interval tree
@@ -71,140 +71,86 @@ static u64 node_last(struct umem_odp_node *n)
struct ib_umem_odp *umem_odp =
container_of(n, struct ib_umem_odp, interval_tree);
- return ib_umem_end(umem_odp->umem) - 1;
+ return ib_umem_end(&umem_odp->umem) - 1;
}
INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
node_start, node_last, static, rbt_ib_umem)
-static void ib_umem_notifier_start_account(struct ib_umem *item)
+static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
{
- mutex_lock(&item->odp_data->umem_mutex);
-
- /* Only update private counters for this umem if it has them.
- * Otherwise skip it. All page faults will be delayed for this umem. */
- if (item->odp_data->mn_counters_active) {
- int notifiers_count = item->odp_data->notifiers_count++;
-
- if (notifiers_count == 0)
- /* Initialize the completion object for waiting on
- * notifiers. Since notifier_count is zero, no one
- * should be waiting right now. */
- reinit_completion(&item->odp_data->notifier_completion);
- }
- mutex_unlock(&item->odp_data->umem_mutex);
-}
-
-static void ib_umem_notifier_end_account(struct ib_umem *item)
-{
- mutex_lock(&item->odp_data->umem_mutex);
-
- /* Only update private counters for this umem if it has them.
- * Otherwise skip it. All page faults will be delayed for this umem. */
- if (item->odp_data->mn_counters_active) {
+ mutex_lock(&umem_odp->umem_mutex);
+ if (umem_odp->notifiers_count++ == 0)
/*
- * This sequence increase will notify the QP page fault that
- * the page that is going to be mapped in the spte could have
- * been freed.
+ * Initialize the completion object for waiting on
+ * notifiers. Since notifier_count is zero, no one should be
+ * waiting right now.
*/
- ++item->odp_data->notifiers_seq;
- if (--item->odp_data->notifiers_count == 0)
- complete_all(&item->odp_data->notifier_completion);
- }
- mutex_unlock(&item->odp_data->umem_mutex);
+ reinit_completion(&umem_odp->notifier_completion);
+ mutex_unlock(&umem_odp->umem_mutex);
}
-/* Account for a new mmu notifier in an ib_ucontext. */
-static void ib_ucontext_notifier_start_account(struct ib_ucontext *context)
+static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
{
- atomic_inc(&context->notifier_count);
+ mutex_lock(&umem_odp->umem_mutex);
+ /*
+ * This sequence increase will notify the QP page fault that the page
+ * that is going to be mapped in the spte could have been freed.
+ */
+ ++umem_odp->notifiers_seq;
+ if (--umem_odp->notifiers_count == 0)
+ complete_all(&umem_odp->notifier_completion);
+ mutex_unlock(&umem_odp->umem_mutex);
}
-/* Account for a terminating mmu notifier in an ib_ucontext.
- *
- * Must be called with the ib_ucontext->umem_rwsem semaphore unlocked, since
- * the function takes the semaphore itself. */
-static void ib_ucontext_notifier_end_account(struct ib_ucontext *context)
+static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
+ u64 start, u64 end, void *cookie)
{
- int zero_notifiers = atomic_dec_and_test(&context->notifier_count);
-
- if (zero_notifiers &&
- !list_empty(&context->no_private_counters)) {
- /* No currently running mmu notifiers. Now is the chance to
- * add private accounting to all previously added umems. */
- struct ib_umem_odp *odp_data, *next;
-
- /* Prevent concurrent mmu notifiers from working on the
- * no_private_counters list. */
- down_write(&context->umem_rwsem);
-
- /* Read the notifier_count again, with the umem_rwsem
- * semaphore taken for write. */
- if (!atomic_read(&context->notifier_count)) {
- list_for_each_entry_safe(odp_data, next,
- &context->no_private_counters,
- no_private_counters) {
- mutex_lock(&odp_data->umem_mutex);
- odp_data->mn_counters_active = true;
- list_del(&odp_data->no_private_counters);
- complete_all(&odp_data->notifier_completion);
- mutex_unlock(&odp_data->umem_mutex);
- }
- }
-
- up_write(&context->umem_rwsem);
- }
-}
+ struct ib_umem *umem = &umem_odp->umem;
-static int ib_umem_notifier_release_trampoline(struct ib_umem *item, u64 start,
- u64 end, void *cookie) {
/*
* Increase the number of notifiers running, to
* prevent any further fault handling on this MR.
*/
- ib_umem_notifier_start_account(item);
- item->odp_data->dying = 1;
+ ib_umem_notifier_start_account(umem_odp);
+ umem_odp->dying = 1;
/* Make sure that the fact the umem is dying is out before we release
* all pending page faults. */
smp_wmb();
- complete_all(&item->odp_data->notifier_completion);
- item->context->invalidate_range(item, ib_umem_start(item),
- ib_umem_end(item));
+ complete_all(&umem_odp->notifier_completion);
+ umem->context->invalidate_range(umem_odp, ib_umem_start(umem),
+ ib_umem_end(umem));
return 0;
}
static void ib_umem_notifier_release(struct mmu_notifier *mn,
struct mm_struct *mm)
{
- struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
-
- if (!context->invalidate_range)
- return;
-
- ib_ucontext_notifier_start_account(context);
- down_read(&context->umem_rwsem);
- rbt_ib_umem_for_each_in_range(&context->umem_tree, 0,
- ULLONG_MAX,
- ib_umem_notifier_release_trampoline,
- true,
- NULL);
- up_read(&context->umem_rwsem);
+ struct ib_ucontext_per_mm *per_mm =
+ container_of(mn, struct ib_ucontext_per_mm, mn);
+
+ down_read(&per_mm->umem_rwsem);
+ if (per_mm->active)
+ rbt_ib_umem_for_each_in_range(
+ &per_mm->umem_tree, 0, ULLONG_MAX,
+ ib_umem_notifier_release_trampoline, true, NULL);
+ up_read(&per_mm->umem_rwsem);
}
-static int invalidate_page_trampoline(struct ib_umem *item, u64 start,
+static int invalidate_page_trampoline(struct ib_umem_odp *item, u64 start,
u64 end, void *cookie)
{
ib_umem_notifier_start_account(item);
- item->context->invalidate_range(item, start, start + PAGE_SIZE);
+ item->umem.context->invalidate_range(item, start, start + PAGE_SIZE);
ib_umem_notifier_end_account(item);
return 0;
}
-static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start,
- u64 end, void *cookie)
+static int invalidate_range_start_trampoline(struct ib_umem_odp *item,
+ u64 start, u64 end, void *cookie)
{
ib_umem_notifier_start_account(item);
- item->context->invalidate_range(item, start, end);
+ item->umem.context->invalidate_range(item, start, end);
return 0;
}
@@ -214,28 +160,30 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
unsigned long end,
bool blockable)
{
- struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
- int ret;
-
- if (!context->invalidate_range)
- return 0;
+ struct ib_ucontext_per_mm *per_mm =
+ container_of(mn, struct ib_ucontext_per_mm, mn);
if (blockable)
- down_read(&context->umem_rwsem);
- else if (!down_read_trylock(&context->umem_rwsem))
+ down_read(&per_mm->umem_rwsem);
+ else if (!down_read_trylock(&per_mm->umem_rwsem))
return -EAGAIN;
- ib_ucontext_notifier_start_account(context);
- ret = rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
- end,
- invalidate_range_start_trampoline,
- blockable, NULL);
- up_read(&context->umem_rwsem);
+ if (!per_mm->active) {
+ up_read(&per_mm->umem_rwsem);
+ /*
+ * At this point active is permanently set and visible to this
+ * CPU without a lock, that fact is relied on to skip the unlock
+ * in range_end.
+ */
+ return 0;
+ }
- return ret;
+ return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start, end,
+ invalidate_range_start_trampoline,
+ blockable, NULL);
}
-static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start,
+static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start,
u64 end, void *cookie)
{
ib_umem_notifier_end_account(item);
@@ -247,22 +195,16 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
unsigned long start,
unsigned long end)
{
- struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+ struct ib_ucontext_per_mm *per_mm =
+ container_of(mn, struct ib_ucontext_per_mm, mn);
- if (!context->invalidate_range)
+ if (unlikely(!per_mm->active))
return;
- /*
- * TODO: we currently bail out if there is any sleepable work to be done
- * in ib_umem_notifier_invalidate_range_start so we shouldn't really block
- * here. But this is ugly and fragile.
- */
- down_read(&context->umem_rwsem);
- rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
+ rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start,
end,
invalidate_range_end_trampoline, true, NULL);
- up_read(&context->umem_rwsem);
- ib_ucontext_notifier_end_account(context);
+ up_read(&per_mm->umem_rwsem);
}
static const struct mmu_notifier_ops ib_umem_notifiers = {
@@ -271,31 +213,158 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
.invalidate_range_end = ib_umem_notifier_invalidate_range_end,
};
-struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
- unsigned long addr,
- size_t size)
+static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
{
- struct ib_umem *umem;
+ struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
+ struct ib_umem *umem = &umem_odp->umem;
+
+ down_write(&per_mm->umem_rwsem);
+ if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
+ rbt_ib_umem_insert(&umem_odp->interval_tree,
+ &per_mm->umem_tree);
+ up_write(&per_mm->umem_rwsem);
+}
+
+static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
+{
+ struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
+ struct ib_umem *umem = &umem_odp->umem;
+
+ down_write(&per_mm->umem_rwsem);
+ if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
+ rbt_ib_umem_remove(&umem_odp->interval_tree,
+ &per_mm->umem_tree);
+ complete_all(&umem_odp->notifier_completion);
+
+ up_write(&per_mm->umem_rwsem);
+}
+
+static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx,
+ struct mm_struct *mm)
+{
+ struct ib_ucontext_per_mm *per_mm;
+ int ret;
+
+ per_mm = kzalloc(sizeof(*per_mm), GFP_KERNEL);
+ if (!per_mm)
+ return ERR_PTR(-ENOMEM);
+
+ per_mm->context = ctx;
+ per_mm->mm = mm;
+ per_mm->umem_tree = RB_ROOT_CACHED;
+ init_rwsem(&per_mm->umem_rwsem);
+ per_mm->active = ctx->invalidate_range;
+
+ rcu_read_lock();
+ per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
+ rcu_read_unlock();
+
+ WARN_ON(mm != current->mm);
+
+ per_mm->mn.ops = &ib_umem_notifiers;
+ ret = mmu_notifier_register(&per_mm->mn, per_mm->mm);
+ if (ret) {
+ dev_err(&ctx->device->dev,
+ "Failed to register mmu_notifier %d\n", ret);
+ goto out_pid;
+ }
+
+ list_add(&per_mm->ucontext_list, &ctx->per_mm_list);
+ return per_mm;
+
+out_pid:
+ put_pid(per_mm->tgid);
+ kfree(per_mm);
+ return ERR_PTR(ret);
+}
+
+static int get_per_mm(struct ib_umem_odp *umem_odp)
+{
+ struct ib_ucontext *ctx = umem_odp->umem.context;
+ struct ib_ucontext_per_mm *per_mm;
+
+ /*
+ * Generally speaking we expect only one or two per_mm in this list,
+ * so no reason to optimize this search today.
+ */
+ mutex_lock(&ctx->per_mm_list_lock);
+ list_for_each_entry(per_mm, &ctx->per_mm_list, ucontext_list) {
+ if (per_mm->mm == umem_odp->umem.owning_mm)
+ goto found;
+ }
+
+ per_mm = alloc_per_mm(ctx, umem_odp->umem.owning_mm);
+ if (IS_ERR(per_mm)) {
+ mutex_unlock(&ctx->per_mm_list_lock);
+ return PTR_ERR(per_mm);
+ }
+
+found:
+ umem_odp->per_mm = per_mm;
+ per_mm->odp_mrs_count++;
+ mutex_unlock(&ctx->per_mm_list_lock);
+
+ return 0;
+}
+
+static void free_per_mm(struct rcu_head *rcu)
+{
+ kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu));
+}
+
+void put_per_mm(struct ib_umem_odp *umem_odp)
+{
+ struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
+ struct ib_ucontext *ctx = umem_odp->umem.context;
+ bool need_free;
+
+ mutex_lock(&ctx->per_mm_list_lock);
+ umem_odp->per_mm = NULL;
+ per_mm->odp_mrs_count--;
+ need_free = per_mm->odp_mrs_count == 0;
+ if (need_free)
+ list_del(&per_mm->ucontext_list);
+ mutex_unlock(&ctx->per_mm_list_lock);
+
+ if (!need_free)
+ return;
+
+ /*
+ * NOTE! mmu_notifier_unregister() can happen between a start/end
+ * callback, resulting in an start/end, and thus an unbalanced
+ * lock. This doesn't really matter to us since we are about to kfree
+ * the memory that holds the lock, however LOCKDEP doesn't like this.
+ */
+ down_write(&per_mm->umem_rwsem);
+ per_mm->active = false;
+ up_write(&per_mm->umem_rwsem);
+
+ WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root));
+ mmu_notifier_unregister_no_release(&per_mm->mn, per_mm->mm);
+ put_pid(per_mm->tgid);
+ mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm);
+}
+
+struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
+ unsigned long addr, size_t size)
+{
+ struct ib_ucontext *ctx = per_mm->context;
struct ib_umem_odp *odp_data;
+ struct ib_umem *umem;
int pages = size >> PAGE_SHIFT;
int ret;
- umem = kzalloc(sizeof(*umem), GFP_KERNEL);
- if (!umem)
+ odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
+ if (!odp_data)
return ERR_PTR(-ENOMEM);
-
- umem->context = context;
+ umem = &odp_data->umem;
+ umem->context = ctx;
umem->length = size;
umem->address = addr;
umem->page_shift = PAGE_SHIFT;
umem->writable = 1;
-
- odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
- if (!odp_data) {
- ret = -ENOMEM;
- goto out_umem;
- }
- odp_data->umem = umem;
+ umem->is_odp = 1;
+ odp_data->per_mm = per_mm;
mutex_init(&odp_data->umem_mutex);
init_completion(&odp_data->notifier_completion);
@@ -314,39 +383,34 @@ struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
goto out_page_list;
}
- down_write(&context->umem_rwsem);
- context->odp_mrs_count++;
- rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree);
- if (likely(!atomic_read(&context->notifier_count)))
- odp_data->mn_counters_active = true;
- else
- list_add(&odp_data->no_private_counters,
- &context->no_private_counters);
- up_write(&context->umem_rwsem);
-
- umem->odp_data = odp_data;
+ /*
+ * Caller must ensure that the umem_odp that the per_mm came from
+ * cannot be freed during the call to ib_alloc_odp_umem.
+ */
+ mutex_lock(&ctx->per_mm_list_lock);
+ per_mm->odp_mrs_count++;
+ mutex_unlock(&ctx->per_mm_list_lock);
+ add_umem_to_per_mm(odp_data);
- return umem;
+ return odp_data;
out_page_list:
vfree(odp_data->page_list);
out_odp_data:
kfree(odp_data);
-out_umem:
- kfree(umem);
return ERR_PTR(ret);
}
EXPORT_SYMBOL(ib_alloc_odp_umem);
-int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
- int access)
+int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
{
+ struct ib_umem *umem = &umem_odp->umem;
+ /*
+ * NOTE: This must called in a process context where umem->owning_mm
+ * == current->mm
+ */
+ struct mm_struct *mm = umem->owning_mm;
int ret_val;
- struct pid *our_pid;
- struct mm_struct *mm = get_task_mm(current);
-
- if (!mm)
- return -EINVAL;
if (access & IB_ACCESS_HUGETLB) {
struct vm_area_struct *vma;
@@ -366,111 +430,43 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
umem->hugetlb = 0;
}
- /* Prevent creating ODP MRs in child processes */
- rcu_read_lock();
- our_pid = get_task_pid(current->group_leader, PIDTYPE_PID);
- rcu_read_unlock();
- put_pid(our_pid);
- if (context->tgid != our_pid) {
- ret_val = -EINVAL;
- goto out_mm;
- }
-
- umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL);
- if (!umem->odp_data) {
- ret_val = -ENOMEM;
- goto out_mm;
- }
- umem->odp_data->umem = umem;
-
- mutex_init(&umem->odp_data->umem_mutex);
+ mutex_init(&umem_odp->umem_mutex);
- init_completion(&umem->odp_data->notifier_completion);
+ init_completion(&umem_odp->notifier_completion);
if (ib_umem_num_pages(umem)) {
- umem->odp_data->page_list =
- vzalloc(array_size(sizeof(*umem->odp_data->page_list),
+ umem_odp->page_list =
+ vzalloc(array_size(sizeof(*umem_odp->page_list),
ib_umem_num_pages(umem)));
- if (!umem->odp_data->page_list) {
- ret_val = -ENOMEM;
- goto out_odp_data;
- }
+ if (!umem_odp->page_list)
+ return -ENOMEM;
- umem->odp_data->dma_list =
- vzalloc(array_size(sizeof(*umem->odp_data->dma_list),
+ umem_odp->dma_list =
+ vzalloc(array_size(sizeof(*umem_odp->dma_list),
ib_umem_num_pages(umem)));
- if (!umem->odp_data->dma_list) {
+ if (!umem_odp->dma_list) {
ret_val = -ENOMEM;
goto out_page_list;
}
}
- /*
- * When using MMU notifiers, we will get a
- * notification before the "current" task (and MM) is
- * destroyed. We use the umem_rwsem semaphore to synchronize.
- */
- down_write(&context->umem_rwsem);
- context->odp_mrs_count++;
- if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
- rbt_ib_umem_insert(&umem->odp_data->interval_tree,
- &context->umem_tree);
- if (likely(!atomic_read(&context->notifier_count)) ||
- context->odp_mrs_count == 1)
- umem->odp_data->mn_counters_active = true;
- else
- list_add(&umem->odp_data->no_private_counters,
- &context->no_private_counters);
- downgrade_write(&context->umem_rwsem);
-
- if (context->odp_mrs_count == 1) {
- /*
- * Note that at this point, no MMU notifier is running
- * for this context!
- */
- atomic_set(&context->notifier_count, 0);
- INIT_HLIST_NODE(&context->mn.hlist);
- context->mn.ops = &ib_umem_notifiers;
- /*
- * Lock-dep detects a false positive for mmap_sem vs.
- * umem_rwsem, due to not grasping downgrade_write correctly.
- */
- lockdep_off();
- ret_val = mmu_notifier_register(&context->mn, mm);
- lockdep_on();
- if (ret_val) {
- pr_err("Failed to register mmu_notifier %d\n", ret_val);
- ret_val = -EBUSY;
- goto out_mutex;
- }
- }
-
- up_read(&context->umem_rwsem);
+ ret_val = get_per_mm(umem_odp);
+ if (ret_val)
+ goto out_dma_list;
+ add_umem_to_per_mm(umem_odp);
- /*
- * Note that doing an mmput can cause a notifier for the relevant mm.
- * If the notifier is called while we hold the umem_rwsem, this will
- * cause a deadlock. Therefore, we release the reference only after we
- * released the semaphore.
- */
- mmput(mm);
return 0;
-out_mutex:
- up_read(&context->umem_rwsem);
- vfree(umem->odp_data->dma_list);
+out_dma_list:
+ vfree(umem_odp->dma_list);
out_page_list:
- vfree(umem->odp_data->page_list);
-out_odp_data:
- kfree(umem->odp_data);
-out_mm:
- mmput(mm);
+ vfree(umem_odp->page_list);
return ret_val;
}
-void ib_umem_odp_release(struct ib_umem *umem)
+void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
{
- struct ib_ucontext *context = umem->context;
+ struct ib_umem *umem = &umem_odp->umem;
/*
* Ensure that no more pages are mapped in the umem.
@@ -478,61 +474,13 @@ void ib_umem_odp_release(struct ib_umem *umem)
* It is the driver's responsibility to ensure, before calling us,
* that the hardware will not attempt to access the MR any more.
*/
- ib_umem_odp_unmap_dma_pages(umem, ib_umem_start(umem),
+ ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
ib_umem_end(umem));
- down_write(&context->umem_rwsem);
- if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
- rbt_ib_umem_remove(&umem->odp_data->interval_tree,
- &context->umem_tree);
- context->odp_mrs_count--;
- if (!umem->odp_data->mn_counters_active) {
- list_del(&umem->odp_data->no_private_counters);
- complete_all(&umem->odp_data->notifier_completion);
- }
-
- /*
- * Downgrade the lock to a read lock. This ensures that the notifiers
- * (who lock the mutex for reading) will be able to finish, and we
- * will be able to enventually obtain the mmu notifiers SRCU. Note
- * that since we are doing it atomically, no other user could register
- * and unregister while we do the check.
- */
- downgrade_write(&context->umem_rwsem);
- if (!context->odp_mrs_count) {
- struct task_struct *owning_process = NULL;
- struct mm_struct *owning_mm = NULL;
-
- owning_process = get_pid_task(context->tgid,
- PIDTYPE_PID);
- if (owning_process == NULL)
- /*
- * The process is already dead, notifier were removed
- * already.
- */
- goto out;
-
- owning_mm = get_task_mm(owning_process);
- if (owning_mm == NULL)
- /*
- * The process' mm is already dead, notifier were
- * removed already.
- */
- goto out_put_task;
- mmu_notifier_unregister(&context->mn, owning_mm);
-
- mmput(owning_mm);
-
-out_put_task:
- put_task_struct(owning_process);
- }
-out:
- up_read(&context->umem_rwsem);
-
- vfree(umem->odp_data->dma_list);
- vfree(umem->odp_data->page_list);
- kfree(umem->odp_data);
- kfree(umem);
+ remove_umem_from_per_mm(umem_odp);
+ put_per_mm(umem_odp);
+ vfree(umem_odp->dma_list);
+ vfree(umem_odp->page_list);
}
/*
@@ -544,7 +492,7 @@ out:
* @access_mask: access permissions needed for this page.
* @current_seq: sequence number for synchronization with invalidations.
* the sequence number is taken from
- * umem->odp_data->notifiers_seq.
+ * umem_odp->notifiers_seq.
*
* The function returns -EFAULT if the DMA mapping operation fails. It returns
* -EAGAIN if a concurrent invalidation prevents us from updating the page.
@@ -554,12 +502,13 @@ out:
* umem.
*/
static int ib_umem_odp_map_dma_single_page(
- struct ib_umem *umem,
+ struct ib_umem_odp *umem_odp,
int page_index,
struct page *page,
u64 access_mask,
unsigned long current_seq)
{
+ struct ib_umem *umem = &umem_odp->umem;
struct ib_device *dev = umem->context->device;
dma_addr_t dma_addr;
int stored_page = 0;
@@ -571,11 +520,11 @@ static int ib_umem_odp_map_dma_single_page(
* handle case of a racing notifier. This check also allows us to bail
* early if we have a notifier running in parallel with us.
*/
- if (ib_umem_mmu_notifier_retry(umem, current_seq)) {
+ if (ib_umem_mmu_notifier_retry(umem_odp, current_seq)) {
ret = -EAGAIN;
goto out;
}
- if (!(umem->odp_data->dma_list[page_index])) {
+ if (!(umem_odp->dma_list[page_index])) {
dma_addr = ib_dma_map_page(dev,
page,
0, BIT(umem->page_shift),
@@ -584,15 +533,15 @@ static int ib_umem_odp_map_dma_single_page(
ret = -EFAULT;
goto out;
}
- umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
- umem->odp_data->page_list[page_index] = page;
+ umem_odp->dma_list[page_index] = dma_addr | access_mask;
+ umem_odp->page_list[page_index] = page;
umem->npages++;
stored_page = 1;
- } else if (umem->odp_data->page_list[page_index] == page) {
- umem->odp_data->dma_list[page_index] |= access_mask;
+ } else if (umem_odp->page_list[page_index] == page) {
+ umem_odp->dma_list[page_index] |= access_mask;
} else {
pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
- umem->odp_data->page_list[page_index], page);
+ umem_odp->page_list[page_index], page);
/* Better remove the mapping now, to prevent any further
* damage. */
remove_existing_mapping = 1;
@@ -605,7 +554,7 @@ out:
if (remove_existing_mapping && umem->context->invalidate_range) {
invalidate_page_trampoline(
- umem,
+ umem_odp,
ib_umem_start(umem) + (page_index >> umem->page_shift),
ib_umem_start(umem) + ((page_index + 1) >>
umem->page_shift),
@@ -621,7 +570,7 @@ out:
*
* Pins the range of pages passed in the argument, and maps them to
* DMA addresses. The DMA addresses of the mapped pages is updated in
- * umem->odp_data->dma_list.
+ * umem_odp->dma_list.
*
* Returns the number of pages mapped in success, negative error code
* for failure.
@@ -629,7 +578,7 @@ out:
* the function from completing its task.
* An -ENOENT error code indicates that userspace process is being terminated
* and mm was already destroyed.
- * @umem: the umem to map and pin
+ * @umem_odp: the umem to map and pin
* @user_virt: the address from which we need to map.
* @bcnt: the minimal number of bytes to pin and map. The mapping might be
* bigger due to alignment, and may also be smaller in case of an error
@@ -639,13 +588,15 @@ out:
* range.
* @current_seq: the MMU notifiers sequance value for synchronization with
* invalidations. the sequance number is read from
- * umem->odp_data->notifiers_seq before calling this function
+ * umem_odp->notifiers_seq before calling this function
*/
-int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
- u64 access_mask, unsigned long current_seq)
+int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
+ u64 bcnt, u64 access_mask,
+ unsigned long current_seq)
{
+ struct ib_umem *umem = &umem_odp->umem;
struct task_struct *owning_process = NULL;
- struct mm_struct *owning_mm = NULL;
+ struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
struct page **local_page_list = NULL;
u64 page_mask, off;
int j, k, ret = 0, start_idx, npages = 0, page_shift;
@@ -669,15 +620,14 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
user_virt = user_virt & page_mask;
bcnt += off; /* Charge for the first page offset as well. */
- owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
- if (owning_process == NULL) {
+ /*
+ * owning_process is allowed to be NULL, this means somehow the mm is
+ * existing beyond the lifetime of the originating process.. Presumably
+ * mmget_not_zero will fail in this case.
+ */
+ owning_process = get_pid_task(umem_odp->per_mm->tgid, PIDTYPE_PID);
+ if (WARN_ON(!mmget_not_zero(umem_odp->umem.owning_mm))) {
ret = -EINVAL;
- goto out_no_task;
- }
-
- owning_mm = get_task_mm(owning_process);
- if (owning_mm == NULL) {
- ret = -ENOENT;
goto out_put_task;
}
@@ -709,7 +659,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
break;
bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
- mutex_lock(&umem->odp_data->umem_mutex);
+ mutex_lock(&umem_odp->umem_mutex);
for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
if (user_virt & ~page_mask) {
p += PAGE_SIZE;
@@ -722,7 +672,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
}
ret = ib_umem_odp_map_dma_single_page(
- umem, k, local_page_list[j],
+ umem_odp, k, local_page_list[j],
access_mask, current_seq);
if (ret < 0)
break;
@@ -730,7 +680,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
p = page_to_phys(local_page_list[j]);
k++;
}
- mutex_unlock(&umem->odp_data->umem_mutex);
+ mutex_unlock(&umem_odp->umem_mutex);
if (ret < 0) {
/* Release left over pages when handling errors. */
@@ -749,16 +699,17 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
mmput(owning_mm);
out_put_task:
- put_task_struct(owning_process);
-out_no_task:
+ if (owning_process)
+ put_task_struct(owning_process);
free_page((unsigned long)local_page_list);
return ret;
}
EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
-void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
+void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
u64 bound)
{
+ struct ib_umem *umem = &umem_odp->umem;
int idx;
u64 addr;
struct ib_device *dev = umem->context->device;
@@ -770,12 +721,12 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
* faults from completion. We might be racing with other
* invalidations, so we must make sure we free each page only
* once. */
- mutex_lock(&umem->odp_data->umem_mutex);
+ mutex_lock(&umem_odp->umem_mutex);
for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
- if (umem->odp_data->page_list[idx]) {
- struct page *page = umem->odp_data->page_list[idx];
- dma_addr_t dma = umem->odp_data->dma_list[idx];
+ if (umem_odp->page_list[idx]) {
+ struct page *page = umem_odp->page_list[idx];
+ dma_addr_t dma = umem_odp->dma_list[idx];
dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
WARN_ON(!dma_addr);
@@ -798,12 +749,12 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
/* on demand pinning support */
if (!umem->context->invalidate_range)
put_page(page);
- umem->odp_data->page_list[idx] = NULL;
- umem->odp_data->dma_list[idx] = 0;
+ umem_odp->page_list[idx] = NULL;
+ umem_odp->dma_list[idx] = 0;
umem->npages--;
}
}
- mutex_unlock(&umem->odp_data->umem_mutex);
+ mutex_unlock(&umem_odp->umem_mutex);
}
EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
@@ -830,7 +781,7 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
return -EAGAIN;
next = rbt_ib_umem_iter_next(node, start, last - 1);
umem = container_of(node, struct ib_umem_odp, interval_tree);
- ret_val = cb(umem->umem, start, last, cookie) || ret_val;
+ ret_val = cb(umem, start, last, cookie) || ret_val;
}
return ret_val;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index c34a6852d691..f55f48f6b272 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -138,7 +138,7 @@ static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) +
static dev_t dynamic_umad_dev;
static dev_t dynamic_issm_dev;
-static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
+static DEFINE_IDA(umad_ida);
static void ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device, void *client_data);
@@ -1132,7 +1132,7 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
if (!port)
return -ENODEV;
- return sprintf(buf, "%s\n", port->ib_dev->name);
+ return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev));
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
@@ -1159,11 +1159,10 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
dev_t base_umad;
dev_t base_issm;
- devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
- if (devnum >= IB_UMAD_MAX_PORTS)
+ devnum = ida_alloc_max(&umad_ida, IB_UMAD_MAX_PORTS - 1, GFP_KERNEL);
+ if (devnum < 0)
return -1;
port->dev_num = devnum;
- set_bit(devnum, dev_map);
if (devnum >= IB_UMAD_NUM_FIXED_MINOR) {
base_umad = dynamic_umad_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
base_issm = dynamic_issm_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
@@ -1227,7 +1226,7 @@ err_dev:
err_cdev:
cdev_del(&port->cdev);
- clear_bit(devnum, dev_map);
+ ida_free(&umad_ida, devnum);
return -1;
}
@@ -1261,7 +1260,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
}
mutex_unlock(&port->file_mutex);
- clear_bit(port->dev_num, dev_map);
+ ida_free(&umad_ida, port->dev_num);
}
static void ib_umad_add_one(struct ib_device *device)
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 5df8e548cc14..c97935a0c7c6 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -100,13 +100,14 @@ struct ib_uverbs_device {
atomic_t refcount;
int num_comp_vectors;
struct completion comp;
- struct device *dev;
+ struct device dev;
+ /* First group for device attributes, NULL terminated array */
+ const struct attribute_group *groups[2];
struct ib_device __rcu *ib_dev;
int devnum;
struct cdev cdev;
struct rb_root xrcd_tree;
struct mutex xrcd_tree_mutex;
- struct kobject kobj;
struct srcu_struct disassociate_srcu;
struct mutex lists_mutex; /* protect lists */
struct list_head uverbs_file_list;
@@ -146,7 +147,6 @@ struct ib_uverbs_file {
struct ib_event_handler event_handler;
struct ib_uverbs_async_event_file *async_file;
struct list_head list;
- int is_closed;
/*
* To access the uobjects list hw_destroy_rwsem must be held for write
@@ -158,6 +158,9 @@ struct ib_uverbs_file {
spinlock_t uobjects_lock;
struct list_head uobjects;
+ struct mutex umap_lock;
+ struct list_head umaps;
+
u64 uverbs_cmd_mask;
u64 uverbs_ex_cmd_mask;
@@ -218,12 +221,6 @@ struct ib_ucq_object {
u32 async_events_reported;
};
-struct ib_uflow_resources;
-struct ib_uflow_object {
- struct ib_uobject uobject;
- struct ib_uflow_resources *resources;
-};
-
extern const struct file_operations uverbs_event_fops;
void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index a21d5214afc3..a93853770e3c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -117,18 +117,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
/* ufile is required when some objects are released */
ucontext->ufile = file;
- rcu_read_lock();
- ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
- rcu_read_unlock();
- ucontext->closing = 0;
+ ucontext->closing = false;
ucontext->cleanup_retryable = false;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- ucontext->umem_tree = RB_ROOT_CACHED;
- init_rwsem(&ucontext->umem_rwsem);
- ucontext->odp_mrs_count = 0;
- INIT_LIST_HEAD(&ucontext->no_private_counters);
-
+ mutex_init(&ucontext->per_mm_list_lock);
+ INIT_LIST_HEAD(&ucontext->per_mm_list);
if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
ucontext->invalidate_range = NULL;
@@ -172,7 +166,6 @@ err_fd:
put_unused_fd(resp.async_fd);
err_free:
- put_pid(ucontext->tgid);
ib_dev->dealloc_ucontext(ucontext);
err_alloc:
@@ -2027,33 +2020,55 @@ static int modify_qp(struct ib_uverbs_file *file,
if ((cmd->base.attr_mask & IB_QP_CUR_STATE &&
cmd->base.cur_qp_state > IB_QPS_ERR) ||
- cmd->base.qp_state > IB_QPS_ERR) {
+ (cmd->base.attr_mask & IB_QP_STATE &&
+ cmd->base.qp_state > IB_QPS_ERR)) {
ret = -EINVAL;
goto release_qp;
}
- attr->qp_state = cmd->base.qp_state;
- attr->cur_qp_state = cmd->base.cur_qp_state;
- attr->path_mtu = cmd->base.path_mtu;
- attr->path_mig_state = cmd->base.path_mig_state;
- attr->qkey = cmd->base.qkey;
- attr->rq_psn = cmd->base.rq_psn;
- attr->sq_psn = cmd->base.sq_psn;
- attr->dest_qp_num = cmd->base.dest_qp_num;
- attr->qp_access_flags = cmd->base.qp_access_flags;
- attr->pkey_index = cmd->base.pkey_index;
- attr->alt_pkey_index = cmd->base.alt_pkey_index;
- attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify;
- attr->max_rd_atomic = cmd->base.max_rd_atomic;
- attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic;
- attr->min_rnr_timer = cmd->base.min_rnr_timer;
- attr->port_num = cmd->base.port_num;
- attr->timeout = cmd->base.timeout;
- attr->retry_cnt = cmd->base.retry_cnt;
- attr->rnr_retry = cmd->base.rnr_retry;
- attr->alt_port_num = cmd->base.alt_port_num;
- attr->alt_timeout = cmd->base.alt_timeout;
- attr->rate_limit = cmd->rate_limit;
+ if (cmd->base.attr_mask & IB_QP_STATE)
+ attr->qp_state = cmd->base.qp_state;
+ if (cmd->base.attr_mask & IB_QP_CUR_STATE)
+ attr->cur_qp_state = cmd->base.cur_qp_state;
+ if (cmd->base.attr_mask & IB_QP_PATH_MTU)
+ attr->path_mtu = cmd->base.path_mtu;
+ if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE)
+ attr->path_mig_state = cmd->base.path_mig_state;
+ if (cmd->base.attr_mask & IB_QP_QKEY)
+ attr->qkey = cmd->base.qkey;
+ if (cmd->base.attr_mask & IB_QP_RQ_PSN)
+ attr->rq_psn = cmd->base.rq_psn;
+ if (cmd->base.attr_mask & IB_QP_SQ_PSN)
+ attr->sq_psn = cmd->base.sq_psn;
+ if (cmd->base.attr_mask & IB_QP_DEST_QPN)
+ attr->dest_qp_num = cmd->base.dest_qp_num;
+ if (cmd->base.attr_mask & IB_QP_ACCESS_FLAGS)
+ attr->qp_access_flags = cmd->base.qp_access_flags;
+ if (cmd->base.attr_mask & IB_QP_PKEY_INDEX)
+ attr->pkey_index = cmd->base.pkey_index;
+ if (cmd->base.attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
+ attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify;
+ if (cmd->base.attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+ attr->max_rd_atomic = cmd->base.max_rd_atomic;
+ if (cmd->base.attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic;
+ if (cmd->base.attr_mask & IB_QP_MIN_RNR_TIMER)
+ attr->min_rnr_timer = cmd->base.min_rnr_timer;
+ if (cmd->base.attr_mask & IB_QP_PORT)
+ attr->port_num = cmd->base.port_num;
+ if (cmd->base.attr_mask & IB_QP_TIMEOUT)
+ attr->timeout = cmd->base.timeout;
+ if (cmd->base.attr_mask & IB_QP_RETRY_CNT)
+ attr->retry_cnt = cmd->base.retry_cnt;
+ if (cmd->base.attr_mask & IB_QP_RNR_RETRY)
+ attr->rnr_retry = cmd->base.rnr_retry;
+ if (cmd->base.attr_mask & IB_QP_ALT_PATH) {
+ attr->alt_port_num = cmd->base.alt_port_num;
+ attr->alt_timeout = cmd->base.alt_timeout;
+ attr->alt_pkey_index = cmd->base.alt_pkey_index;
+ }
+ if (cmd->base.attr_mask & IB_QP_RATE_LIMIT)
+ attr->rate_limit = cmd->rate_limit;
if (cmd->base.attr_mask & IB_QP_AV)
copy_ah_attr_from_uverbs(qp->device, &attr->ah_attr,
@@ -2747,16 +2762,7 @@ out_put:
return ret ? ret : in_len;
}
-struct ib_uflow_resources {
- size_t max;
- size_t num;
- size_t collection_num;
- size_t counters_num;
- struct ib_counters **counters;
- struct ib_flow_action **collection;
-};
-
-static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
+struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
{
struct ib_uflow_resources *resources;
@@ -2786,6 +2792,7 @@ err:
return NULL;
}
+EXPORT_SYMBOL(flow_resources_alloc);
void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
{
@@ -2804,10 +2811,11 @@ void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
kfree(uflow_res->counters);
kfree(uflow_res);
}
+EXPORT_SYMBOL(ib_uverbs_flow_resources_free);
-static void flow_resources_add(struct ib_uflow_resources *uflow_res,
- enum ib_flow_spec_type type,
- void *ibobj)
+void flow_resources_add(struct ib_uflow_resources *uflow_res,
+ enum ib_flow_spec_type type,
+ void *ibobj)
{
WARN_ON(uflow_res->num >= uflow_res->max);
@@ -2828,6 +2836,7 @@ static void flow_resources_add(struct ib_uflow_resources *uflow_res,
uflow_res->num++;
}
+EXPORT_SYMBOL(flow_resources_add);
static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
struct ib_uverbs_flow_spec *kern_spec,
@@ -3462,7 +3471,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
struct ib_uverbs_create_flow cmd;
struct ib_uverbs_create_flow_resp resp;
struct ib_uobject *uobj;
- struct ib_uflow_object *uflow;
struct ib_flow *flow_id;
struct ib_uverbs_flow_attr *kern_flow_attr;
struct ib_flow_attr *flow_attr;
@@ -3601,13 +3609,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err = PTR_ERR(flow_id);
goto err_free;
}
- atomic_inc(&qp->usecnt);
- flow_id->qp = qp;
- flow_id->device = qp->device;
- flow_id->uobject = uobj;
- uobj->object = flow_id;
- uflow = container_of(uobj, typeof(*uflow), uobject);
- uflow->resources = uflow_res;
+
+ ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res);
memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id;
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 1a6b229e3db3..b0e493e8d860 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -57,6 +57,7 @@ struct bundle_priv {
struct ib_uverbs_attr *uattrs;
DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN);
+ DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN);
/*
* Must be last. bundle ends in a flex array which overlaps
@@ -143,6 +144,86 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
0, uattr->len - len);
}
+static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
+ const struct uverbs_api_attr *attr_uapi,
+ struct uverbs_objs_arr_attr *attr,
+ struct ib_uverbs_attr *uattr,
+ u32 attr_bkey)
+{
+ const struct uverbs_attr_spec *spec = &attr_uapi->spec;
+ size_t array_len;
+ u32 *idr_vals;
+ int ret = 0;
+ size_t i;
+
+ if (uattr->attr_data.reserved)
+ return -EINVAL;
+
+ if (uattr->len % sizeof(u32))
+ return -EINVAL;
+
+ array_len = uattr->len / sizeof(u32);
+ if (array_len < spec->u2.objs_arr.min_len ||
+ array_len > spec->u2.objs_arr.max_len)
+ return -EINVAL;
+
+ attr->uobjects =
+ uverbs_alloc(&pbundle->bundle,
+ array_size(array_len, sizeof(*attr->uobjects)));
+ if (IS_ERR(attr->uobjects))
+ return PTR_ERR(attr->uobjects);
+
+ /*
+ * Since idr is 4B and *uobjects is >= 4B, we can use attr->uobjects
+ * to store idrs array and avoid additional memory allocation. The
+ * idrs array is offset to the end of the uobjects array so we will be
+ * able to read idr and replace with a pointer.
+ */
+ idr_vals = (u32 *)(attr->uobjects + array_len) - array_len;
+
+ if (uattr->len > sizeof(uattr->data)) {
+ ret = copy_from_user(idr_vals, u64_to_user_ptr(uattr->data),
+ uattr->len);
+ if (ret)
+ return -EFAULT;
+ } else {
+ memcpy(idr_vals, &uattr->data, uattr->len);
+ }
+
+ for (i = 0; i != array_len; i++) {
+ attr->uobjects[i] = uverbs_get_uobject_from_file(
+ spec->u2.objs_arr.obj_type, pbundle->bundle.ufile,
+ spec->u2.objs_arr.access, idr_vals[i]);
+ if (IS_ERR(attr->uobjects[i])) {
+ ret = PTR_ERR(attr->uobjects[i]);
+ break;
+ }
+ }
+
+ attr->len = i;
+ __set_bit(attr_bkey, pbundle->spec_finalize);
+ return ret;
+}
+
+static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
+ struct uverbs_objs_arr_attr *attr,
+ bool commit)
+{
+ const struct uverbs_attr_spec *spec = &attr_uapi->spec;
+ int current_ret;
+ int ret = 0;
+ size_t i;
+
+ for (i = 0; i != attr->len; i++) {
+ current_ret = uverbs_finalize_object(
+ attr->uobjects[i], spec->u2.objs_arr.access, commit);
+ if (!ret)
+ ret = current_ret;
+ }
+
+ return ret;
+}
+
static int uverbs_process_attr(struct bundle_priv *pbundle,
const struct uverbs_api_attr *attr_uapi,
struct ib_uverbs_attr *uattr, u32 attr_bkey)
@@ -246,6 +327,11 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
}
break;
+
+ case UVERBS_ATTR_TYPE_IDRS_ARRAY:
+ return uverbs_process_idrs_array(pbundle, attr_uapi,
+ &e->objs_arr_attr, uattr,
+ attr_bkey);
default:
return -EOPNOTSUPP;
}
@@ -300,8 +386,7 @@ static int uverbs_set_attr(struct bundle_priv *pbundle,
return -EPROTONOSUPPORT;
return 0;
}
- attr = srcu_dereference(
- *slot, &pbundle->bundle.ufile->device->disassociate_srcu);
+ attr = rcu_dereference_protected(*slot, true);
/* Reject duplicate attributes from user-space */
if (test_bit(attr_bkey, pbundle->bundle.attr_present))
@@ -384,6 +469,7 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
unsigned int i;
int ret = 0;
+ /* fast path for simple uobjects */
i = -1;
while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len,
i + 1)) < key_bitmap_len) {
@@ -397,6 +483,30 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
ret = current_ret;
}
+ i = -1;
+ while ((i = find_next_bit(pbundle->spec_finalize, key_bitmap_len,
+ i + 1)) < key_bitmap_len) {
+ struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
+ const struct uverbs_api_attr *attr_uapi;
+ void __rcu **slot;
+ int current_ret;
+
+ slot = uapi_get_attr_for_method(
+ pbundle,
+ pbundle->method_key | uapi_bkey_to_key_attr(i));
+ if (WARN_ON(!slot))
+ continue;
+
+ attr_uapi = rcu_dereference_protected(*slot, true);
+
+ if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
+ current_ret = uverbs_free_idrs_array(
+ attr_uapi, &attr->objs_arr_attr, commit);
+ if (!ret)
+ ret = current_ret;
+ }
+ }
+
for (memblock = pbundle->allocated_mem; memblock;) {
struct bundle_alloc_head *tmp = memblock;
@@ -429,7 +539,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
uapi_key_ioctl_method(hdr->method_id));
if (unlikely(!slot))
return -EPROTONOSUPPORT;
- method_elm = srcu_dereference(*slot, &ufile->device->disassociate_srcu);
+ method_elm = rcu_dereference_protected(*slot, true);
if (!method_elm->use_stack) {
pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL);
@@ -461,6 +571,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
memset(pbundle->bundle.attr_present, 0,
sizeof(pbundle->bundle.attr_present));
memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize));
+ memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize));
ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
destroy_ret = bundle_destroy(pbundle, ret == 0);
@@ -611,3 +722,26 @@ int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx,
return 0;
}
EXPORT_SYMBOL(uverbs_copy_to);
+
+int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, s64 lower_bound, u64 upper_bound,
+ s64 *def_val)
+{
+ const struct uverbs_attr *attr;
+
+ attr = uverbs_attr_get(attrs_bundle, idx);
+ if (IS_ERR(attr)) {
+ if ((PTR_ERR(attr) != -ENOENT) || !def_val)
+ return PTR_ERR(attr);
+
+ *to = *def_val;
+ } else {
+ *to = attr->ptr_attr.data;
+ }
+
+ if (*to < lower_bound || (*to > 0 && (u64)*to > upper_bound))
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL(_uverbs_get_const);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 823beca448e1..6d373f5515b7 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -45,6 +45,7 @@
#include <linux/cdev.h>
#include <linux/anon_inodes.h>
#include <linux/slab.h>
+#include <linux/sched/mm.h>
#include <linux/uaccess.h>
@@ -72,7 +73,7 @@ enum {
static dev_t dynamic_uverbs_dev;
static struct class *uverbs_class;
-static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
+static DEFINE_IDA(uverbs_ida);
static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
@@ -169,20 +170,16 @@ int uverbs_dealloc_mw(struct ib_mw *mw)
return ret;
}
-static void ib_uverbs_release_dev(struct kobject *kobj)
+static void ib_uverbs_release_dev(struct device *device)
{
struct ib_uverbs_device *dev =
- container_of(kobj, struct ib_uverbs_device, kobj);
+ container_of(device, struct ib_uverbs_device, dev);
uverbs_destroy_api(dev->uapi);
cleanup_srcu_struct(&dev->disassociate_srcu);
kfree(dev);
}
-static struct kobj_type ib_uverbs_dev_ktype = {
- .release = ib_uverbs_release_dev,
-};
-
static void ib_uverbs_release_async_event_file(struct kref *ref)
{
struct ib_uverbs_async_event_file *file =
@@ -265,7 +262,7 @@ void ib_uverbs_release_file(struct kref *ref)
if (atomic_dec_and_test(&file->device->refcount))
ib_uverbs_comp_dev(file->device);
- kobject_put(&file->device->kobj);
+ put_device(&file->device->dev);
kfree(file);
}
@@ -440,6 +437,7 @@ static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
list_del(&entry->obj_list);
kfree(entry);
}
+ file->ev_queue.is_closed = 1;
spin_unlock_irq(&file->ev_queue.lock);
uverbs_close_fd(filp);
@@ -816,6 +814,226 @@ out:
}
/*
+ * Each time we map IO memory into user space this keeps track of the mapping.
+ * When the device is hot-unplugged we 'zap' the mmaps in user space to point
+ * to the zero page and allow the hot unplug to proceed.
+ *
+ * This is necessary for cases like PCI physical hot unplug as the actual BAR
+ * memory may vanish after this and access to it from userspace could MCE.
+ *
+ * RDMA drivers supporting disassociation must have their user space designed
+ * to cope in some way with their IO pages going to the zero page.
+ */
+struct rdma_umap_priv {
+ struct vm_area_struct *vma;
+ struct list_head list;
+};
+
+static const struct vm_operations_struct rdma_umap_ops;
+
+static void rdma_umap_priv_init(struct rdma_umap_priv *priv,
+ struct vm_area_struct *vma)
+{
+ struct ib_uverbs_file *ufile = vma->vm_file->private_data;
+
+ priv->vma = vma;
+ vma->vm_private_data = priv;
+ vma->vm_ops = &rdma_umap_ops;
+
+ mutex_lock(&ufile->umap_lock);
+ list_add(&priv->list, &ufile->umaps);
+ mutex_unlock(&ufile->umap_lock);
+}
+
+/*
+ * The VMA has been dup'd, initialize the vm_private_data with a new tracking
+ * struct
+ */
+static void rdma_umap_open(struct vm_area_struct *vma)
+{
+ struct ib_uverbs_file *ufile = vma->vm_file->private_data;
+ struct rdma_umap_priv *opriv = vma->vm_private_data;
+ struct rdma_umap_priv *priv;
+
+ if (!opriv)
+ return;
+
+ /* We are racing with disassociation */
+ if (!down_read_trylock(&ufile->hw_destroy_rwsem))
+ goto out_zap;
+ /*
+ * Disassociation already completed, the VMA should already be zapped.
+ */
+ if (!ufile->ucontext)
+ goto out_unlock;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ goto out_unlock;
+ rdma_umap_priv_init(priv, vma);
+
+ up_read(&ufile->hw_destroy_rwsem);
+ return;
+
+out_unlock:
+ up_read(&ufile->hw_destroy_rwsem);
+out_zap:
+ /*
+ * We can't allow the VMA to be created with the actual IO pages, that
+ * would break our API contract, and it can't be stopped at this
+ * point, so zap it.
+ */
+ vma->vm_private_data = NULL;
+ zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+}
+
+static void rdma_umap_close(struct vm_area_struct *vma)
+{
+ struct ib_uverbs_file *ufile = vma->vm_file->private_data;
+ struct rdma_umap_priv *priv = vma->vm_private_data;
+
+ if (!priv)
+ return;
+
+ /*
+ * The vma holds a reference on the struct file that created it, which
+ * in turn means that the ib_uverbs_file is guaranteed to exist at
+ * this point.
+ */
+ mutex_lock(&ufile->umap_lock);
+ list_del(&priv->list);
+ mutex_unlock(&ufile->umap_lock);
+ kfree(priv);
+}
+
+static const struct vm_operations_struct rdma_umap_ops = {
+ .open = rdma_umap_open,
+ .close = rdma_umap_close,
+};
+
+static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
+ struct vm_area_struct *vma,
+ unsigned long size)
+{
+ struct ib_uverbs_file *ufile = ucontext->ufile;
+ struct rdma_umap_priv *priv;
+
+ if (vma->vm_end - vma->vm_start != size)
+ return ERR_PTR(-EINVAL);
+
+ /* Driver is using this wrong, must be called by ib_uverbs_mmap */
+ if (WARN_ON(!vma->vm_file ||
+ vma->vm_file->private_data != ufile))
+ return ERR_PTR(-EINVAL);
+ lockdep_assert_held(&ufile->device->disassociate_srcu);
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return ERR_PTR(-ENOMEM);
+ return priv;
+}
+
+/*
+ * Map IO memory into a process. This is to be called by drivers as part of
+ * their mmap() functions if they wish to send something like PCI-E BAR memory
+ * to userspace.
+ */
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
+ unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+ struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size);
+
+ if (IS_ERR(priv))
+ return PTR_ERR(priv);
+
+ vma->vm_page_prot = prot;
+ if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
+ kfree(priv);
+ return -EAGAIN;
+ }
+
+ rdma_umap_priv_init(priv, vma);
+ return 0;
+}
+EXPORT_SYMBOL(rdma_user_mmap_io);
+
+/*
+ * The page case is here for a slightly different reason, the driver expects
+ * to be able to free the page it is sharing to user space when it destroys
+ * its ucontext, which means we need to zap the user space references.
+ *
+ * We could handle this differently by providing an API to allocate a shared
+ * page and then only freeing the shared page when the last ufile is
+ * destroyed.
+ */
+int rdma_user_mmap_page(struct ib_ucontext *ucontext,
+ struct vm_area_struct *vma, struct page *page,
+ unsigned long size)
+{
+ struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size);
+
+ if (IS_ERR(priv))
+ return PTR_ERR(priv);
+
+ if (remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size,
+ vma->vm_page_prot)) {
+ kfree(priv);
+ return -EAGAIN;
+ }
+
+ rdma_umap_priv_init(priv, vma);
+ return 0;
+}
+EXPORT_SYMBOL(rdma_user_mmap_page);
+
+void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
+{
+ struct rdma_umap_priv *priv, *next_priv;
+
+ lockdep_assert_held(&ufile->hw_destroy_rwsem);
+
+ while (1) {
+ struct mm_struct *mm = NULL;
+
+ /* Get an arbitrary mm pointer that hasn't been cleaned yet */
+ mutex_lock(&ufile->umap_lock);
+ if (!list_empty(&ufile->umaps)) {
+ mm = list_first_entry(&ufile->umaps,
+ struct rdma_umap_priv, list)
+ ->vma->vm_mm;
+ mmget(mm);
+ }
+ mutex_unlock(&ufile->umap_lock);
+ if (!mm)
+ return;
+
+ /*
+ * The umap_lock is nested under mmap_sem since it used within
+ * the vma_ops callbacks, so we have to clean the list one mm
+ * at a time to get the lock ordering right. Typically there
+ * will only be one mm, so no big deal.
+ */
+ down_write(&mm->mmap_sem);
+ mutex_lock(&ufile->umap_lock);
+ list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
+ list) {
+ struct vm_area_struct *vma = priv->vma;
+
+ if (vma->vm_mm != mm)
+ continue;
+ list_del_init(&priv->list);
+
+ zap_vma_ptes(vma, vma->vm_start,
+ vma->vm_end - vma->vm_start);
+ vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
+ }
+ mutex_unlock(&ufile->umap_lock);
+ up_write(&mm->mmap_sem);
+ mmput(mm);
+ }
+}
+
+/*
* ib_uverbs_open() does not need the BKL:
*
* - the ib_uverbs_device structures are properly reference counted and
@@ -838,6 +1056,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
if (!atomic_inc_not_zero(&dev->refcount))
return -ENXIO;
+ get_device(&dev->dev);
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
mutex_lock(&dev->lists_mutex);
ib_dev = srcu_dereference(dev->ib_dev,
@@ -875,9 +1094,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
spin_lock_init(&file->uobjects_lock);
INIT_LIST_HEAD(&file->uobjects);
init_rwsem(&file->hw_destroy_rwsem);
+ mutex_init(&file->umap_lock);
+ INIT_LIST_HEAD(&file->umaps);
filp->private_data = file;
- kobject_get(&dev->kobj);
list_add_tail(&file->list, &dev->uverbs_file_list);
mutex_unlock(&dev->lists_mutex);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
@@ -898,6 +1118,7 @@ err:
if (atomic_dec_and_test(&dev->refcount))
ib_uverbs_comp_dev(dev);
+ put_device(&dev->dev);
return ret;
}
@@ -908,10 +1129,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
mutex_lock(&file->device->lists_mutex);
- if (!file->is_closed) {
- list_del(&file->list);
- file->is_closed = 1;
- }
+ list_del_init(&file->list);
mutex_unlock(&file->device->lists_mutex);
if (file->async_file)
@@ -950,37 +1168,34 @@ static struct ib_client uverbs_client = {
.remove = ib_uverbs_remove_one
};
-static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
+static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
char *buf)
{
+ struct ib_uverbs_device *dev =
+ container_of(device, struct ib_uverbs_device, dev);
int ret = -ENODEV;
int srcu_key;
- struct ib_uverbs_device *dev = dev_get_drvdata(device);
struct ib_device *ib_dev;
- if (!dev)
- return -ENODEV;
-
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
- ret = sprintf(buf, "%s\n", ib_dev->name);
+ ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev));
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
return ret;
}
-static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+static DEVICE_ATTR_RO(ibdev);
-static ssize_t show_dev_abi_version(struct device *device,
- struct device_attribute *attr, char *buf)
+static ssize_t abi_version_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
- struct ib_uverbs_device *dev = dev_get_drvdata(device);
+ struct ib_uverbs_device *dev =
+ container_of(device, struct ib_uverbs_device, dev);
int ret = -ENODEV;
int srcu_key;
struct ib_device *ib_dev;
- if (!dev)
- return -ENODEV;
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
@@ -989,7 +1204,17 @@ static ssize_t show_dev_abi_version(struct device *device,
return ret;
}
-static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
+static DEVICE_ATTR_RO(abi_version);
+
+static struct attribute *ib_dev_attrs[] = {
+ &dev_attr_abi_version.attr,
+ &dev_attr_ibdev.attr,
+ NULL,
+};
+
+static const struct attribute_group dev_attr_group = {
+ .attrs = ib_dev_attrs,
+};
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
__stringify(IB_USER_VERBS_ABI_VERSION));
@@ -1027,66 +1252,56 @@ static void ib_uverbs_add_one(struct ib_device *device)
return;
}
+ device_initialize(&uverbs_dev->dev);
+ uverbs_dev->dev.class = uverbs_class;
+ uverbs_dev->dev.parent = device->dev.parent;
+ uverbs_dev->dev.release = ib_uverbs_release_dev;
+ uverbs_dev->groups[0] = &dev_attr_group;
+ uverbs_dev->dev.groups = uverbs_dev->groups;
atomic_set(&uverbs_dev->refcount, 1);
init_completion(&uverbs_dev->comp);
uverbs_dev->xrcd_tree = RB_ROOT;
mutex_init(&uverbs_dev->xrcd_tree_mutex);
- kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
mutex_init(&uverbs_dev->lists_mutex);
INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
+ rcu_assign_pointer(uverbs_dev->ib_dev, device);
+ uverbs_dev->num_comp_vectors = device->num_comp_vectors;
- devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
- if (devnum >= IB_UVERBS_MAX_DEVICES)
+ devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
+ GFP_KERNEL);
+ if (devnum < 0)
goto err;
uverbs_dev->devnum = devnum;
- set_bit(devnum, dev_map);
if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
else
base = IB_UVERBS_BASE_DEV + devnum;
- rcu_assign_pointer(uverbs_dev->ib_dev, device);
- uverbs_dev->num_comp_vectors = device->num_comp_vectors;
-
if (ib_uverbs_create_uapi(device, uverbs_dev))
- goto err;
+ goto err_uapi;
+
+ uverbs_dev->dev.devt = base;
+ dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum);
- cdev_init(&uverbs_dev->cdev, NULL);
+ cdev_init(&uverbs_dev->cdev,
+ device->mmap ? &uverbs_mmap_fops : &uverbs_fops);
uverbs_dev->cdev.owner = THIS_MODULE;
- uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
- cdev_set_parent(&uverbs_dev->cdev, &uverbs_dev->kobj);
- kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
- if (cdev_add(&uverbs_dev->cdev, base, 1))
- goto err_cdev;
-
- uverbs_dev->dev = device_create(uverbs_class, device->dev.parent,
- uverbs_dev->cdev.dev, uverbs_dev,
- "uverbs%d", uverbs_dev->devnum);
- if (IS_ERR(uverbs_dev->dev))
- goto err_cdev;
-
- if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
- goto err_class;
- if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
- goto err_class;
- ib_set_client_data(device, &uverbs_client, uverbs_dev);
+ ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev);
+ if (ret)
+ goto err_uapi;
+ ib_set_client_data(device, &uverbs_client, uverbs_dev);
return;
-err_class:
- device_destroy(uverbs_class, uverbs_dev->cdev.dev);
-
-err_cdev:
- cdev_del(&uverbs_dev->cdev);
- clear_bit(devnum, dev_map);
-
+err_uapi:
+ ida_free(&uverbs_ida, devnum);
err:
if (atomic_dec_and_test(&uverbs_dev->refcount))
ib_uverbs_comp_dev(uverbs_dev);
wait_for_completion(&uverbs_dev->comp);
- kobject_put(&uverbs_dev->kobj);
+ put_device(&uverbs_dev->dev);
return;
}
@@ -1107,8 +1322,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
while (!list_empty(&uverbs_dev->uverbs_file_list)) {
file = list_first_entry(&uverbs_dev->uverbs_file_list,
struct ib_uverbs_file, list);
- file->is_closed = 1;
- list_del(&file->list);
+ list_del_init(&file->list);
kref_get(&file->ref);
/* We must release the mutex before going ahead and calling
@@ -1156,10 +1370,8 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
if (!uverbs_dev)
return;
- dev_set_drvdata(uverbs_dev->dev, NULL);
- device_destroy(uverbs_class, uverbs_dev->cdev.dev);
- cdev_del(&uverbs_dev->cdev);
- clear_bit(uverbs_dev->devnum, dev_map);
+ cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
+ ida_free(&uverbs_ida, uverbs_dev->devnum);
if (device->disassociate_ucontext) {
/* We disassociate HW resources and immediately return.
@@ -1182,7 +1394,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
if (wait_clients)
wait_for_completion(&uverbs_dev->comp);
- kobject_put(&uverbs_dev->kobj);
+ put_device(&uverbs_dev->dev);
}
static char *uverbs_devnode(struct device *dev, umode_t *mode)
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index d8cfafe23bd9..cb9486ad5c67 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -326,11 +326,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
if (IS_ERR(action))
return PTR_ERR(action);
- atomic_set(&action->usecnt, 0);
- action->device = ib_dev;
- action->type = IB_FLOW_ACTION_ESP;
- action->uobject = uobj;
- uobj->object = action;
+ uverbs_flow_action_fill_action(action, uobj, ib_dev,
+ IB_FLOW_ACTION_ESP);
return 0;
}
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 73ea6f0db88f..86f3fc5e04b4 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -73,6 +73,18 @@ static int uapi_merge_method(struct uverbs_api *uapi,
if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN)
method_elm->driver_method |= is_driver;
+ /*
+ * Like other uobject based things we only support a single
+ * uobject being NEW'd or DESTROY'd
+ */
+ if (attr->attr.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
+ u8 access = attr->attr.u2.objs_arr.access;
+
+ if (WARN_ON(access == UVERBS_ACCESS_NEW ||
+ access == UVERBS_ACCESS_DESTROY))
+ return -EINVAL;
+ }
+
attr_slot =
uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id),
sizeof(*attr_slot));
@@ -248,6 +260,7 @@ void uverbs_destroy_api(struct uverbs_api *uapi)
kfree(rcu_dereference_protected(*slot, true));
radix_tree_iter_delete(&uapi->radix, &iter, slot);
}
+ kfree(uapi);
}
struct uverbs_api *uverbs_alloc_api(
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 6ee03d6089eb..65a7e0b44ad7 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -264,7 +264,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
}
pd->res.type = RDMA_RESTRACK_PD;
- pd->res.kern_name = caller;
+ rdma_restrack_set_task(&pd->res, caller);
rdma_restrack_add(&pd->res);
if (mr_access_flags) {
@@ -710,7 +710,7 @@ static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid,
ah_attr->roce.dmac,
- sgid_attr->ndev, &hop_limit);
+ sgid_attr, &hop_limit);
grh->hop_limit = hop_limit;
return ret;
@@ -1509,8 +1509,7 @@ static const struct {
};
bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
- enum ib_qp_type type, enum ib_qp_attr_mask mask,
- enum rdma_link_layer ll)
+ enum ib_qp_type type, enum ib_qp_attr_mask mask)
{
enum ib_qp_attr_mask req_param, opt_param;
@@ -1629,14 +1628,16 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
if (rdma_ib_or_roce(qp->device, port)) {
if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
- pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n",
- __func__, qp->device->name);
+ dev_warn(&qp->device->dev,
+ "%s rq_psn overflow, masking to 24 bits\n",
+ __func__);
attr->rq_psn &= 0xffffff;
}
if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
- pr_warn("%s: %s sq_psn overflow, masking to 24 bits\n",
- __func__, qp->device->name);
+ dev_warn(&qp->device->dev,
+ " %s sq_psn overflow, masking to 24 bits\n",
+ __func__);
attr->sq_psn &= 0xffffff;
}
}
@@ -1888,7 +1889,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
cq->cq_context = cq_context;
atomic_set(&cq->usecnt, 0);
cq->res.type = RDMA_RESTRACK_CQ;
- cq->res.kern_name = caller;
+ rdma_restrack_set_task(&cq->res, caller);
rdma_restrack_add(&cq->res);
}
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 96f76896488d..31baa8939a4f 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -40,7 +40,6 @@
#ifndef __BNXT_RE_H__
#define __BNXT_RE_H__
#define ROCE_DRV_MODULE_NAME "bnxt_re"
-#define ROCE_DRV_MODULE_VERSION "1.0.0"
#define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver"
#define BNXT_RE_PAGE_SHIFT_4K (12)
@@ -120,6 +119,8 @@ struct bnxt_re_dev {
#define BNXT_RE_FLAG_HAVE_L2_REF 3
#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
#define BNXT_RE_FLAG_QOS_WORK_REG 5
+#define BNXT_RE_FLAG_RESOURCES_ALLOCATED 7
+#define BNXT_RE_FLAG_RESOURCES_INITIALIZED 8
#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
struct net_device *netdev;
unsigned int version, major, minor;
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index 77416bc61e6e..604b71875f5f 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -68,6 +68,8 @@ static const char * const bnxt_re_stat_name[] = {
[BNXT_RE_TX_PKTS] = "tx_pkts",
[BNXT_RE_TX_BYTES] = "tx_bytes",
[BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors",
+ [BNXT_RE_RX_DROPS] = "rx_roce_drops",
+ [BNXT_RE_RX_DISCARDS] = "rx_roce_discards",
[BNXT_RE_TO_RETRANSMITS] = "to_retransmits",
[BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd",
[BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded",
@@ -106,7 +108,8 @@ static const char * const bnxt_re_stat_name[] = {
[BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err",
[BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err",
[BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err",
- [BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err"
+ [BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err",
+ [BNXT_RE_OUT_OF_SEQ_ERR] = "oos_drop_count"
};
int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
@@ -128,6 +131,10 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
if (bnxt_re_stats) {
stats->value[BNXT_RE_RECOVERABLE_ERRORS] =
le64_to_cpu(bnxt_re_stats->tx_bcast_pkts);
+ stats->value[BNXT_RE_RX_DROPS] =
+ le64_to_cpu(bnxt_re_stats->rx_drop_pkts);
+ stats->value[BNXT_RE_RX_DISCARDS] =
+ le64_to_cpu(bnxt_re_stats->rx_discard_pkts);
stats->value[BNXT_RE_RX_PKTS] =
le64_to_cpu(bnxt_re_stats->rx_ucast_pkts);
stats->value[BNXT_RE_RX_BYTES] =
@@ -220,6 +227,8 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
rdev->stats.res_tx_pci_err;
stats->value[BNXT_RE_RES_RX_PCI_ERR] =
rdev->stats.res_rx_pci_err;
+ stats->value[BNXT_RE_OUT_OF_SEQ_ERR] =
+ rdev->stats.res_oos_drop_count;
}
return ARRAY_SIZE(bnxt_re_stat_name);
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h
index a01a922717d5..76399f477e5c 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.h
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h
@@ -51,6 +51,8 @@ enum bnxt_re_hw_stats {
BNXT_RE_TX_PKTS,
BNXT_RE_TX_BYTES,
BNXT_RE_RECOVERABLE_ERRORS,
+ BNXT_RE_RX_DROPS,
+ BNXT_RE_RX_DISCARDS,
BNXT_RE_TO_RETRANSMITS,
BNXT_RE_SEQ_ERR_NAKS_RCVD,
BNXT_RE_MAX_RETRY_EXCEEDED,
@@ -90,6 +92,7 @@ enum bnxt_re_hw_stats {
BNXT_RE_RES_SRQ_LOAD_ERR,
BNXT_RE_RES_TX_PCI_ERR,
BNXT_RE_RES_RX_PCI_ERR,
+ BNXT_RE_OUT_OF_SEQ_ERR,
BNXT_RE_NUM_COUNTERS
};
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index bbfb86eb2d24..54fdd4cf5288 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -833,6 +833,8 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
"Failed to destroy Shadow QP");
return rc;
}
+ bnxt_qplib_free_qp_res(&rdev->qplib_res,
+ &rdev->qp1_sqp->qplib_qp);
mutex_lock(&rdev->qp_lock);
list_del(&rdev->qp1_sqp->list);
atomic_dec(&rdev->qp_count);
@@ -1596,8 +1598,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state);
new_qp_state = qp_attr->qp_state;
if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state,
- ib_qp->qp_type, qp_attr_mask,
- IB_LINK_LAYER_ETHERNET)) {
+ ib_qp->qp_type, qp_attr_mask)) {
dev_err(rdev_to_dev(rdev),
"Invalid attribute mask: %#x specified ",
qp_attr_mask);
@@ -2662,6 +2663,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
nq->budget++;
atomic_inc(&rdev->cq_count);
+ spin_lock_init(&cq->cq_lock);
if (context) {
struct bnxt_re_cq_resp resp;
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 20b9f31052bf..cf2282654210 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -67,7 +67,7 @@
#include "hw_counters.h"
static char version[] =
- BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n";
+ BNXT_RE_DESC "\n";
MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
@@ -78,7 +78,7 @@ static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list);
/* Mutex to protect the list of bnxt_re devices added */
static DEFINE_MUTEX(bnxt_re_dev_lock);
static struct workqueue_struct *bnxt_re_wq;
-static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait);
+static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev);
/* SR-IOV helper functions */
@@ -182,7 +182,7 @@ static void bnxt_re_shutdown(void *p)
if (!rdev)
return;
- bnxt_re_ib_unreg(rdev, false);
+ bnxt_re_ib_unreg(rdev);
}
static void bnxt_re_stop_irq(void *handle)
@@ -251,7 +251,7 @@ static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
/* Driver registration routines used to let the networking driver (bnxt_en)
* to know that the RoCE driver is now installed
*/
-static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev, bool lock_wait)
+static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev)
{
struct bnxt_en_dev *en_dev;
int rc;
@@ -260,14 +260,9 @@ static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev, bool lock_wait)
return -EINVAL;
en_dev = rdev->en_dev;
- /* Acquire rtnl lock if it is not invokded from netdev event */
- if (lock_wait)
- rtnl_lock();
rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev,
BNXT_ROCE_ULP);
- if (lock_wait)
- rtnl_unlock();
return rc;
}
@@ -281,14 +276,12 @@ static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
en_dev = rdev->en_dev;
- rtnl_lock();
rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP,
&bnxt_re_ulp_ops, rdev);
- rtnl_unlock();
return rc;
}
-static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait)
+static int bnxt_re_free_msix(struct bnxt_re_dev *rdev)
{
struct bnxt_en_dev *en_dev;
int rc;
@@ -298,13 +291,9 @@ static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait)
en_dev = rdev->en_dev;
- if (lock_wait)
- rtnl_lock();
rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP);
- if (lock_wait)
- rtnl_unlock();
return rc;
}
@@ -320,7 +309,6 @@ static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
num_msix_want = min_t(u32, BNXT_RE_MAX_MSIX, num_online_cpus());
- rtnl_lock();
num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP,
rdev->msix_entries,
num_msix_want);
@@ -335,7 +323,6 @@ static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
}
rdev->num_msix = num_msix_got;
done:
- rtnl_unlock();
return rc;
}
@@ -358,24 +345,18 @@ static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
fw_msg->timeout = timeout;
}
-static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id,
- bool lock_wait)
+static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id)
{
struct bnxt_en_dev *en_dev = rdev->en_dev;
struct hwrm_ring_free_input req = {0};
struct hwrm_ring_free_output resp;
struct bnxt_fw_msg fw_msg;
- bool do_unlock = false;
int rc = -EINVAL;
if (!en_dev)
return rc;
memset(&fw_msg, 0, sizeof(fw_msg));
- if (lock_wait) {
- rtnl_lock();
- do_unlock = true;
- }
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1);
req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
@@ -386,8 +367,6 @@ static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id,
if (rc)
dev_err(rdev_to_dev(rdev),
"Failed to free HW ring:%d :%#x", req.ring_id, rc);
- if (do_unlock)
- rtnl_unlock();
return rc;
}
@@ -405,7 +384,6 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr,
return rc;
memset(&fw_msg, 0, sizeof(fw_msg));
- rtnl_lock();
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1);
req.enables = 0;
req.page_tbl_addr = cpu_to_le64(dma_arr[0]);
@@ -426,27 +404,21 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr,
if (!rc)
*fw_ring_id = le16_to_cpu(resp.ring_id);
- rtnl_unlock();
return rc;
}
static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
- u32 fw_stats_ctx_id, bool lock_wait)
+ u32 fw_stats_ctx_id)
{
struct bnxt_en_dev *en_dev = rdev->en_dev;
struct hwrm_stat_ctx_free_input req = {0};
struct bnxt_fw_msg fw_msg;
- bool do_unlock = false;
int rc = -EINVAL;
if (!en_dev)
return rc;
memset(&fw_msg, 0, sizeof(fw_msg));
- if (lock_wait) {
- rtnl_lock();
- do_unlock = true;
- }
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1);
req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
@@ -457,8 +429,6 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
dev_err(rdev_to_dev(rdev),
"Failed to free HW stats context %#x", rc);
- if (do_unlock)
- rtnl_unlock();
return rc;
}
@@ -478,7 +448,6 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
return rc;
memset(&fw_msg, 0, sizeof(fw_msg));
- rtnl_lock();
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
req.update_period_ms = cpu_to_le32(1000);
@@ -490,7 +459,6 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
if (!rc)
*fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id);
- rtnl_unlock();
return rc;
}
@@ -567,6 +535,34 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
return en_dev;
}
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
+}
+static DEVICE_ATTR_RO(hw_rev);
+
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
+}
+static DEVICE_ATTR_RO(hca_type);
+
+static struct attribute *bnxt_re_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ NULL
+};
+
+static const struct attribute_group bnxt_re_dev_attr_group = {
+ .attrs = bnxt_re_attributes,
+};
+
static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
{
ib_unregister_device(&rdev->ibdev);
@@ -579,7 +575,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
/* ib device init */
ibdev->owner = THIS_MODULE;
ibdev->node_type = RDMA_NODE_IB_CA;
- strlcpy(ibdev->name, "bnxt_re%d", IB_DEVICE_NAME_MAX);
strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
strlen(BNXT_RE_DESC) + 5);
ibdev->phys_port_cnt = 1;
@@ -671,34 +666,11 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats;
ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats;
+ rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group);
ibdev->driver_id = RDMA_DRIVER_BNXT_RE;
- return ib_register_device(ibdev, NULL);
+ return ib_register_device(ibdev, "bnxt_re%d", NULL);
}
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
-
- return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
-}
-
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
-
- return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
-}
-
-static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
-static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
-
-static struct device_attribute *bnxt_re_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type
-};
-
static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
{
dev_put(rdev->netdev);
@@ -896,10 +868,8 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
{
int i;
- if (rdev->nq[0].hwq.max_elements) {
- for (i = 1; i < rdev->num_msix; i++)
- bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
- }
+ for (i = 1; i < rdev->num_msix; i++)
+ bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
if (rdev->qplib_res.rcfw)
bnxt_qplib_cleanup_res(&rdev->qplib_res);
@@ -908,6 +878,7 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
{
int rc = 0, i;
+ int num_vec_enabled = 0;
bnxt_qplib_init_res(&rdev->qplib_res);
@@ -923,25 +894,29 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
"Failed to enable NQ with rc = 0x%x", rc);
goto fail;
}
+ num_vec_enabled++;
}
return 0;
fail:
+ for (i = num_vec_enabled; i >= 0; i--)
+ bnxt_qplib_disable_nq(&rdev->nq[i]);
+
return rc;
}
-static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev, bool lock_wait)
+static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev)
{
int i;
for (i = 0; i < rdev->num_msix - 1; i++) {
- bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, lock_wait);
+ bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id);
bnxt_qplib_free_nq(&rdev->nq[i]);
}
}
-static void bnxt_re_free_res(struct bnxt_re_dev *rdev, bool lock_wait)
+static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
{
- bnxt_re_free_nq_res(rdev, lock_wait);
+ bnxt_re_free_nq_res(rdev);
if (rdev->qplib_res.dpi_tbl.max) {
bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
@@ -957,6 +932,7 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev, bool lock_wait)
static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
{
int rc = 0, i;
+ int num_vec_created = 0;
/* Configure and allocate resources for qplib */
rdev->qplib_res.rcfw = &rdev->rcfw;
@@ -983,7 +959,7 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
if (rc) {
dev_err(rdev_to_dev(rdev), "Alloc Failed NQ%d rc:%#x",
i, rc);
- goto dealloc_dpi;
+ goto free_nq;
}
rc = bnxt_re_net_ring_alloc
(rdev, rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr,
@@ -996,14 +972,17 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
dev_err(rdev_to_dev(rdev),
"Failed to allocate NQ fw id with rc = 0x%x",
rc);
+ bnxt_qplib_free_nq(&rdev->nq[i]);
goto free_nq;
}
+ num_vec_created++;
}
return 0;
free_nq:
- for (i = 0; i < rdev->num_msix - 1; i++)
+ for (i = num_vec_created; i >= 0; i--) {
+ bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id);
bnxt_qplib_free_nq(&rdev->nq[i]);
-dealloc_dpi:
+ }
bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
&rdev->qplib_res.dpi_tbl,
&rdev->dpi_privileged);
@@ -1021,12 +1000,17 @@ static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
struct ib_event ib_event;
ib_event.device = ibdev;
- if (qp)
+ if (qp) {
ib_event.element.qp = qp;
- else
+ ib_event.event = event;
+ if (qp->event_handler)
+ qp->event_handler(&ib_event, qp->qp_context);
+
+ } else {
ib_event.element.port_num = port_num;
- ib_event.event = event;
- ib_dispatch_event(&ib_event);
+ ib_event.event = event;
+ ib_dispatch_event(&ib_event);
+ }
}
#define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN 0x02
@@ -1219,43 +1203,42 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
return 0;
}
-static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
+static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
{
- int i, rc;
+ int rc;
if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
- for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++)
- device_remove_file(&rdev->ibdev.dev,
- bnxt_re_attributes[i]);
/* Cleanup ib dev */
bnxt_re_unregister_ib(rdev);
}
if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
- cancel_delayed_work(&rdev->worker);
+ cancel_delayed_work_sync(&rdev->worker);
- bnxt_re_cleanup_res(rdev);
- bnxt_re_free_res(rdev, lock_wait);
+ if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED,
+ &rdev->flags))
+ bnxt_re_cleanup_res(rdev);
+ if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags))
+ bnxt_re_free_res(rdev);
if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
if (rc)
dev_warn(rdev_to_dev(rdev),
"Failed to deinitialize RCFW: %#x", rc);
- bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id,
- lock_wait);
+ bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
- bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, lock_wait);
+ bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id);
bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
}
if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) {
- rc = bnxt_re_free_msix(rdev, lock_wait);
+ rc = bnxt_re_free_msix(rdev);
if (rc)
dev_warn(rdev_to_dev(rdev),
"Failed to free MSI-X vectors: %#x", rc);
}
if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) {
- rc = bnxt_re_unregister_netdev(rdev, lock_wait);
+ rc = bnxt_re_unregister_netdev(rdev);
if (rc)
dev_warn(rdev_to_dev(rdev),
"Failed to unregister with netdev: %#x", rc);
@@ -1274,7 +1257,13 @@ static void bnxt_re_worker(struct work_struct *work)
static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
{
- int i, j, rc;
+ int rc;
+
+ bool locked;
+
+ /* Acquire rtnl lock through out this function */
+ rtnl_lock();
+ locked = true;
/* Registered a new RoCE device instance to netdev */
rc = bnxt_re_register_netdev(rdev);
@@ -1358,12 +1347,15 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
pr_err("Failed to allocate resources: %#x\n", rc);
goto fail;
}
+ set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags);
rc = bnxt_re_init_res(rdev);
if (rc) {
pr_err("Failed to initialize resources: %#x\n", rc);
goto fail;
}
+ set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags);
+
if (!rdev->is_virtfn) {
rc = bnxt_re_setup_qos(rdev);
if (rc)
@@ -1374,28 +1366,17 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
}
+ rtnl_unlock();
+ locked = false;
+
/* Register ib dev */
rc = bnxt_re_register_ib(rdev);
if (rc) {
pr_err("Failed to register with IB: %#x\n", rc);
goto fail;
}
- dev_info(rdev_to_dev(rdev), "Device registered successfully");
- for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) {
- rc = device_create_file(&rdev->ibdev.dev,
- bnxt_re_attributes[i]);
- if (rc) {
- dev_err(rdev_to_dev(rdev),
- "Failed to create IB sysfs: %#x", rc);
- /* Must clean up all created device files */
- for (j = 0; j < i; j++)
- device_remove_file(&rdev->ibdev.dev,
- bnxt_re_attributes[j]);
- bnxt_re_unregister_ib(rdev);
- goto fail;
- }
- }
set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
+ dev_info(rdev_to_dev(rdev), "Device registered successfully");
ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
&rdev->active_width);
set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
@@ -1404,17 +1385,21 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
return 0;
free_sctx:
- bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id, true);
+ bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
free_ctx:
bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
disable_rcfw:
bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
free_ring:
- bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, true);
+ bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id);
free_rcfw:
bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
fail:
- bnxt_re_ib_unreg(rdev, true);
+ if (!locked)
+ rtnl_lock();
+ bnxt_re_ib_unreg(rdev);
+ rtnl_unlock();
+
return rc;
}
@@ -1567,7 +1552,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
*/
if (atomic_read(&rdev->sched_count) > 0)
goto exit;
- bnxt_re_ib_unreg(rdev, false);
+ bnxt_re_ib_unreg(rdev);
bnxt_re_remove_one(rdev);
bnxt_re_dev_unreg(rdev);
break;
@@ -1646,7 +1631,10 @@ static void __exit bnxt_re_mod_exit(void)
*/
flush_workqueue(bnxt_re_wq);
bnxt_re_dev_stop(rdev);
- bnxt_re_ib_unreg(rdev, true);
+ /* Acquire the rtnl_lock as the L2 resources are freed here */
+ rtnl_lock();
+ bnxt_re_ib_unreg(rdev);
+ rtnl_unlock();
bnxt_re_remove_one(rdev);
bnxt_re_dev_unreg(rdev);
}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index e426b990c1dd..b98b054148cd 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -36,6 +36,8 @@
* Description: Fast Path Operators
*/
+#define dev_fmt(fmt) "QPLIB: " fmt
+
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
@@ -71,8 +73,7 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
if (!qp->sq.flushed) {
dev_dbg(&scq->hwq.pdev->dev,
- "QPLIB: FP: Adding to SQ Flush list = %p",
- qp);
+ "FP: Adding to SQ Flush list = %p\n", qp);
bnxt_qplib_cancel_phantom_processing(qp);
list_add_tail(&qp->sq_flush, &scq->sqf_head);
qp->sq.flushed = true;
@@ -80,8 +81,7 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
if (!qp->srq) {
if (!qp->rq.flushed) {
dev_dbg(&rcq->hwq.pdev->dev,
- "QPLIB: FP: Adding to RQ Flush list = %p",
- qp);
+ "FP: Adding to RQ Flush list = %p\n", qp);
list_add_tail(&qp->rq_flush, &rcq->rqf_head);
qp->rq.flushed = true;
}
@@ -196,7 +196,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_q *rq = &qp->rq;
- struct bnxt_qplib_q *sq = &qp->rq;
+ struct bnxt_qplib_q *sq = &qp->sq;
int rc = 0;
if (qp->sq_hdr_buf_size && sq->hwq.max_elements) {
@@ -207,7 +207,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
if (!qp->sq_hdr_buf) {
rc = -ENOMEM;
dev_err(&res->pdev->dev,
- "QPLIB: Failed to create sq_hdr_buf");
+ "Failed to create sq_hdr_buf\n");
goto fail;
}
}
@@ -221,7 +221,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
if (!qp->rq_hdr_buf) {
rc = -ENOMEM;
dev_err(&res->pdev->dev,
- "QPLIB: Failed to create rq_hdr_buf");
+ "Failed to create rq_hdr_buf\n");
goto fail;
}
}
@@ -277,8 +277,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
num_cqne_processed++;
else
dev_warn(&nq->pdev->dev,
- "QPLIB: cqn - type 0x%x not handled",
- type);
+ "cqn - type 0x%x not handled\n", type);
spin_unlock_bh(&cq->compl_lock);
break;
}
@@ -298,7 +297,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
num_srqne_processed++;
else
dev_warn(&nq->pdev->dev,
- "QPLIB: SRQ event 0x%x not handled",
+ "SRQ event 0x%x not handled\n",
nqsrqe->event);
break;
}
@@ -306,8 +305,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
break;
default:
dev_warn(&nq->pdev->dev,
- "QPLIB: nqe with type = 0x%x not handled",
- type);
+ "nqe with type = 0x%x not handled\n", type);
break;
}
raw_cons++;
@@ -360,7 +358,8 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
}
/* Make sure the HW is stopped! */
- bnxt_qplib_nq_stop_irq(nq, true);
+ if (nq->requested)
+ bnxt_qplib_nq_stop_irq(nq, true);
if (nq->bar_reg_iomem)
iounmap(nq->bar_reg_iomem);
@@ -396,7 +395,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
rc = irq_set_affinity_hint(nq->vector, &nq->mask);
if (rc) {
dev_warn(&nq->pdev->dev,
- "QPLIB: set affinity failed; vector: %d nq_idx: %d\n",
+ "set affinity failed; vector: %d nq_idx: %d\n",
nq->vector, nq_indx);
}
nq->requested = true;
@@ -443,7 +442,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true);
if (rc) {
dev_err(&nq->pdev->dev,
- "QPLIB: Failed to request irq for nq-idx %d", nq_idx);
+ "Failed to request irq for nq-idx %d\n", nq_idx);
goto fail;
}
@@ -662,8 +661,8 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
spin_lock(&srq_hwq->lock);
if (srq->start_idx == srq->last_idx) {
- dev_err(&srq_hwq->pdev->dev, "QPLIB: FP: SRQ (0x%x) is full!",
- srq->id);
+ dev_err(&srq_hwq->pdev->dev,
+ "FP: SRQ (0x%x) is full!\n", srq->id);
rc = -EINVAL;
spin_unlock(&srq_hwq->lock);
goto done;
@@ -1324,7 +1323,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
}
}
if (i == res->sgid_tbl.max)
- dev_warn(&res->pdev->dev, "QPLIB: SGID not found??");
+ dev_warn(&res->pdev->dev, "SGID not found??\n");
qp->ah.hop_limit = sb->hop_limit;
qp->ah.traffic_class = sb->traffic_class;
@@ -1536,7 +1535,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
if (bnxt_qplib_queue_full(sq)) {
dev_err(&sq->hwq.pdev->dev,
- "QPLIB: prod = %#x cons = %#x qdepth = %#x delta = %#x",
+ "prod = %#x cons = %#x qdepth = %#x delta = %#x\n",
sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements,
sq->q_full_delta);
rc = -ENOMEM;
@@ -1561,7 +1560,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
/* Copy the inline data */
if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
dev_warn(&sq->hwq.pdev->dev,
- "QPLIB: Inline data length > 96 detected");
+ "Inline data length > 96 detected\n");
data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH;
} else {
data_len = wqe->inline_len;
@@ -1776,7 +1775,7 @@ done:
queue_work(qp->scq->nq->cqn_wq, &nq_work->work);
} else {
dev_err(&sq->hwq.pdev->dev,
- "QPLIB: FP: Failed to allocate SQ nq_work!");
+ "FP: Failed to allocate SQ nq_work!\n");
rc = -ENOMEM;
}
}
@@ -1815,13 +1814,12 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
sch_handler = true;
dev_dbg(&rq->hwq.pdev->dev,
- "%s Error QP. Scheduling for poll_cq\n",
- __func__);
+ "%s: Error QP. Scheduling for poll_cq\n", __func__);
goto queue_err;
}
if (bnxt_qplib_queue_full(rq)) {
dev_err(&rq->hwq.pdev->dev,
- "QPLIB: FP: QP (0x%x) RQ is full!", qp->id);
+ "FP: QP (0x%x) RQ is full!\n", qp->id);
rc = -EINVAL;
goto done;
}
@@ -1870,7 +1868,7 @@ queue_err:
queue_work(qp->rcq->nq->cqn_wq, &nq_work->work);
} else {
dev_err(&rq->hwq.pdev->dev,
- "QPLIB: FP: Failed to allocate RQ nq_work!");
+ "FP: Failed to allocate RQ nq_work!\n");
rc = -ENOMEM;
}
}
@@ -1932,7 +1930,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
if (!cq->dpi) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: FP: CREATE_CQ failed due to NULL DPI");
+ "FP: CREATE_CQ failed due to NULL DPI\n");
return -EINVAL;
}
req.dpi = cpu_to_le32(cq->dpi->dpi);
@@ -1969,6 +1967,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
INIT_LIST_HEAD(&cq->sqf_head);
INIT_LIST_HEAD(&cq->rqf_head);
spin_lock_init(&cq->compl_lock);
+ spin_lock_init(&cq->flush_lock);
bnxt_qplib_arm_cq_enable(cq);
return 0;
@@ -2172,7 +2171,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
* comes back
*/
dev_dbg(&cq->hwq.pdev->dev,
- "FP:Got Phantom CQE");
+ "FP: Got Phantom CQE\n");
sq->condition = false;
sq->single = true;
rc = 0;
@@ -2189,7 +2188,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
peek_raw_cq_cons++;
}
dev_err(&cq->hwq.pdev->dev,
- "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x",
+ "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n",
cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
rc = -EINVAL;
}
@@ -2213,7 +2212,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
le64_to_cpu(hwcqe->qp_handle));
if (!qp) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: Process Req qp is NULL");
+ "FP: Process Req qp is NULL\n");
return -EINVAL;
}
sq = &qp->sq;
@@ -2221,16 +2220,14 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
if (cqe_sq_cons > sq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process req reported ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
+ "FP: CQ Process req reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
cqe_sq_cons, sq->hwq.max_elements);
return -EINVAL;
}
if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
- "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+ "%s: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
/* Require to walk the sq's swq to fabricate CQEs for all previously
@@ -2262,9 +2259,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
hwcqe->status != CQ_REQ_STATUS_OK) {
cqe->status = hwcqe->status;
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Processed Req ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: wr_id[%d] = 0x%llx with status 0x%x",
+ "FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n",
sw_sq_cons, cqe->wr_id, cqe->status);
cqe++;
(*budget)--;
@@ -2330,12 +2325,12 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
if (!qp) {
- dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq RC qp is NULL");
+ dev_err(&cq->hwq.pdev->dev, "process_cq RC qp is NULL\n");
return -EINVAL;
}
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
- "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+ "%s: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
@@ -2356,9 +2351,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
return -EINVAL;
if (wr_id_idx >= srq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process RC ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
+ "FP: CQ Process RC wr_id idx 0x%x exceeded SRQ max 0x%x\n",
wr_id_idx, srq->hwq.max_elements);
return -EINVAL;
}
@@ -2371,9 +2364,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
rq = &qp->rq;
if (wr_id_idx >= rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process RC ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
+ "FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n",
wr_id_idx, rq->hwq.max_elements);
return -EINVAL;
}
@@ -2409,12 +2400,12 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
if (!qp) {
- dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq UD qp is NULL");
+ dev_err(&cq->hwq.pdev->dev, "process_cq UD qp is NULL\n");
return -EINVAL;
}
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
- "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+ "%s: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
cqe = *pcqe;
@@ -2439,9 +2430,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
if (wr_id_idx >= srq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process UD ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
+ "FP: CQ Process UD wr_id idx 0x%x exceeded SRQ max 0x%x\n",
wr_id_idx, srq->hwq.max_elements);
return -EINVAL;
}
@@ -2454,9 +2443,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
rq = &qp->rq;
if (wr_id_idx >= rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process UD ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
+ "FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n",
wr_id_idx, rq->hwq.max_elements);
return -EINVAL;
}
@@ -2508,13 +2495,12 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
if (!qp) {
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: process_cq Raw/QP1 qp is NULL");
+ dev_err(&cq->hwq.pdev->dev, "process_cq Raw/QP1 qp is NULL\n");
return -EINVAL;
}
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
- "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+ "%s: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
cqe = *pcqe;
@@ -2543,14 +2529,12 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
srq = qp->srq;
if (!srq) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: SRQ used but not defined??");
+ "FP: SRQ used but not defined??\n");
return -EINVAL;
}
if (wr_id_idx >= srq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process Raw/QP1 ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
+ "FP: CQ Process Raw/QP1 wr_id idx 0x%x exceeded SRQ max 0x%x\n",
wr_id_idx, srq->hwq.max_elements);
return -EINVAL;
}
@@ -2563,9 +2547,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
rq = &qp->rq;
if (wr_id_idx >= rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: ix 0x%x exceeded RQ max 0x%x",
+ "FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n",
wr_id_idx, rq->hwq.max_elements);
return -EINVAL;
}
@@ -2600,14 +2582,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
/* Check the Status */
if (hwcqe->status != CQ_TERMINAL_STATUS_OK)
dev_warn(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process Terminal Error status = 0x%x",
+ "FP: CQ Process Terminal Error status = 0x%x\n",
hwcqe->status);
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
if (!qp) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process terminal qp is NULL");
+ "FP: CQ Process terminal qp is NULL\n");
return -EINVAL;
}
@@ -2623,16 +2605,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
if (cqe_cons > sq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process terminal reported ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
+ "FP: CQ Process terminal reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
cqe_cons, sq->hwq.max_elements);
goto do_rq;
}
if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
- "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+ "%s: QP in Flush QP = %p\n", __func__, qp);
goto sq_done;
}
@@ -2673,16 +2653,14 @@ do_rq:
goto done;
} else if (cqe_cons > rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Processed terminal ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: reported rq_cons_idx 0x%x exceeds max 0x%x",
+ "FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n",
cqe_cons, rq->hwq.max_elements);
goto done;
}
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
- "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+ "%s: QP in Flush QP = %p\n", __func__, qp);
rc = 0;
goto done;
}
@@ -2704,7 +2682,7 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq,
/* Check the Status */
if (hwcqe->status != CQ_CUTOFF_STATUS_OK) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process Cutoff Error status = 0x%x",
+ "FP: CQ Process Cutoff Error status = 0x%x\n",
hwcqe->status);
return -EINVAL;
}
@@ -2724,16 +2702,12 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
spin_lock_irqsave(&cq->flush_lock, flags);
list_for_each_entry(qp, &cq->sqf_head, sq_flush) {
- dev_dbg(&cq->hwq.pdev->dev,
- "QPLIB: FP: Flushing SQ QP= %p",
- qp);
+ dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing SQ QP= %p\n", qp);
__flush_sq(&qp->sq, qp, &cqe, &budget);
}
list_for_each_entry(qp, &cq->rqf_head, rq_flush) {
- dev_dbg(&cq->hwq.pdev->dev,
- "QPLIB: FP: Flushing RQ QP= %p",
- qp);
+ dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing RQ QP= %p\n", qp);
__flush_rq(&qp->rq, qp, &cqe, &budget);
}
spin_unlock_irqrestore(&cq->flush_lock, flags);
@@ -2801,7 +2775,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
goto exit;
default:
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: process_cq unknown type 0x%lx",
+ "process_cq unknown type 0x%lx\n",
hw_cqe->cqe_type_toggle &
CQ_BASE_CQE_TYPE_MASK);
rc = -EINVAL;
@@ -2814,7 +2788,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
* next one
*/
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: process_cqe error rc = 0x%x", rc);
+ "process_cqe error rc = 0x%x\n", rc);
}
raw_cons++;
}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 2852d350ada1..be4e33e9f962 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -35,6 +35,9 @@
*
* Description: RDMA Controller HW interface
*/
+
+#define dev_fmt(fmt) "QPLIB: " fmt
+
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
@@ -96,14 +99,13 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: RCFW not initialized, reject opcode 0x%x",
- opcode);
+ "RCFW not initialized, reject opcode 0x%x\n", opcode);
return -EINVAL;
}
if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
- dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!");
+ dev_err(&rcfw->pdev->dev, "RCFW already initialized!\n");
return -EINVAL;
}
@@ -115,7 +117,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
*/
spin_lock_irqsave(&cmdq->lock, flags);
if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) {
- dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!");
+ dev_err(&rcfw->pdev->dev, "RCFW: CMDQ is full!\n");
spin_unlock_irqrestore(&cmdq->lock, flags);
return -EAGAIN;
}
@@ -154,7 +156,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)];
if (!cmdqe) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: RCFW request failed with no cmdqe!");
+ "RCFW request failed with no cmdqe!\n");
goto done;
}
/* Copy a segment of the req cmd to the cmdq */
@@ -210,7 +212,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) {
/* send failed */
- dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x send failed",
+ dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x send failed\n",
cookie, opcode);
return rc;
}
@@ -224,7 +226,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
rc = __wait_for_resp(rcfw, cookie);
if (rc) {
/* timed out */
- dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec",
+ dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x timedout (%d)msec\n",
cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags);
return rc;
@@ -232,7 +234,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
if (evnt->status) {
/* failed with status */
- dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x status %#x",
+ dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n",
cookie, opcode, evnt->status);
rc = -EFAULT;
}
@@ -298,9 +300,9 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
qp_id = le32_to_cpu(err_event->xid);
qp = rcfw->qp_tbl[qp_id].qp_handle;
dev_dbg(&rcfw->pdev->dev,
- "QPLIB: Received QP error notification");
+ "Received QP error notification\n");
dev_dbg(&rcfw->pdev->dev,
- "QPLIB: qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
+ "qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
qp_id, err_event->req_err_state_reason,
err_event->res_err_state_reason);
if (!qp)
@@ -309,8 +311,17 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
rcfw->aeq_handler(rcfw, qp_event, qp);
break;
default:
- /* Command Response */
- spin_lock_irqsave(&cmdq->lock, flags);
+ /*
+ * Command Response
+ * cmdq->lock needs to be acquired to synchronie
+ * the command send and completion reaping. This function
+ * is always called with creq->lock held. Using
+ * the nested variant of spin_lock.
+ *
+ */
+
+ spin_lock_irqsave_nested(&cmdq->lock, flags,
+ SINGLE_DEPTH_NESTING);
cookie = le16_to_cpu(qp_event->cookie);
mcookie = qp_event->cookie;
blocked = cookie & RCFW_CMD_IS_BLOCKING;
@@ -322,14 +333,16 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
crsqe->resp = NULL;
} else {
- dev_err(&rcfw->pdev->dev,
- "QPLIB: CMD %s resp->cookie = %#x, evnt->cookie = %#x",
- crsqe->resp ? "mismatch" : "collision",
- crsqe->resp ? crsqe->resp->cookie : 0, mcookie);
+ if (crsqe->resp && crsqe->resp->cookie)
+ dev_err(&rcfw->pdev->dev,
+ "CMD %s cookie sent=%#x, recd=%#x\n",
+ crsqe->resp ? "mismatch" : "collision",
+ crsqe->resp ? crsqe->resp->cookie : 0,
+ mcookie);
}
if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
dev_warn(&rcfw->pdev->dev,
- "QPLIB: CMD bit %d was not requested", cbit);
+ "CMD bit %d was not requested\n", cbit);
cmdq->cons += crsqe->req_size;
crsqe->req_size = 0;
@@ -376,14 +389,14 @@ static void bnxt_qplib_service_creq(unsigned long data)
(rcfw, (struct creq_func_event *)creqe))
rcfw->creq_func_event_processed++;
else
- dev_warn
- (&rcfw->pdev->dev, "QPLIB:aeqe:%#x Not handled",
- type);
+ dev_warn(&rcfw->pdev->dev,
+ "aeqe:%#x Not handled\n", type);
break;
default:
- dev_warn(&rcfw->pdev->dev, "QPLIB: creqe with ");
- dev_warn(&rcfw->pdev->dev,
- "QPLIB: op_event = 0x%x not handled", type);
+ if (type != ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT)
+ dev_warn(&rcfw->pdev->dev,
+ "creqe with event 0x%x not handled\n",
+ type);
break;
}
raw_cons++;
@@ -551,7 +564,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE,
HWQ_TYPE_L2_CMPL)) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: HW channel CREQ allocation failed");
+ "HW channel CREQ allocation failed\n");
goto fail;
}
rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT;
@@ -560,7 +573,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE,
HWQ_TYPE_CTX)) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: HW channel CMDQ allocation failed");
+ "HW channel CMDQ allocation failed\n");
goto fail;
}
@@ -605,21 +618,18 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
bnxt_qplib_rcfw_stop_irq(rcfw, true);
- if (rcfw->cmdq_bar_reg_iomem)
- iounmap(rcfw->cmdq_bar_reg_iomem);
- rcfw->cmdq_bar_reg_iomem = NULL;
-
- if (rcfw->creq_bar_reg_iomem)
- iounmap(rcfw->creq_bar_reg_iomem);
- rcfw->creq_bar_reg_iomem = NULL;
+ iounmap(rcfw->cmdq_bar_reg_iomem);
+ iounmap(rcfw->creq_bar_reg_iomem);
indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size);
if (indx != rcfw->bmap_size)
dev_err(&rcfw->pdev->dev,
- "QPLIB: disabling RCFW with pending cmd-bit %lx", indx);
+ "disabling RCFW with pending cmd-bit %lx\n", indx);
kfree(rcfw->cmdq_bitmap);
rcfw->bmap_size = 0;
+ rcfw->cmdq_bar_reg_iomem = NULL;
+ rcfw->creq_bar_reg_iomem = NULL;
rcfw->aeq_handler = NULL;
rcfw->vector = 0;
}
@@ -681,8 +691,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
RCFW_COMM_BASE_OFFSET,
RCFW_COMM_SIZE);
if (!rcfw->cmdq_bar_reg_iomem) {
- dev_err(&rcfw->pdev->dev,
- "QPLIB: CMDQ BAR region %d mapping failed",
+ dev_err(&rcfw->pdev->dev, "CMDQ BAR region %d mapping failed\n",
rcfw->cmdq_bar_reg);
return -ENOMEM;
}
@@ -697,14 +706,15 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
if (!res_base)
dev_err(&rcfw->pdev->dev,
- "QPLIB: CREQ BAR region %d resc start is 0!",
+ "CREQ BAR region %d resc start is 0!\n",
rcfw->creq_bar_reg);
rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off,
4);
if (!rcfw->creq_bar_reg_iomem) {
- dev_err(&rcfw->pdev->dev,
- "QPLIB: CREQ BAR region %d mapping failed",
+ dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n",
rcfw->creq_bar_reg);
+ iounmap(rcfw->cmdq_bar_reg_iomem);
+ rcfw->cmdq_bar_reg_iomem = NULL;
return -ENOMEM;
}
rcfw->creq_qp_event_processed = 0;
@@ -717,7 +727,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true);
if (rc) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: Failed to request IRQ for CREQ rc = 0x%x", rc);
+ "Failed to request IRQ for CREQ rc = 0x%x\n", rc);
bnxt_qplib_disable_rcfw_channel(rcfw);
return rc;
}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 46416dfe8830..9a8687dc0a79 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -154,6 +154,8 @@ struct bnxt_qplib_qp_node {
void *qp_handle; /* ptr to qplib_qp */
};
+#define BNXT_QPLIB_OOS_COUNT_MASK 0xFFFFFFFF
+
/* RCFW Communication Channels */
struct bnxt_qplib_rcfw {
struct pci_dev *pdev;
@@ -190,6 +192,8 @@ struct bnxt_qplib_rcfw {
struct bnxt_qplib_crsq *crsqe_tbl;
int qp_tbl_size;
struct bnxt_qplib_qp_node *qp_tbl;
+ u64 oos_prev;
+ u32 init_oos_stats;
};
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 539a5d44e6db..59eeac55626f 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -36,6 +36,8 @@
* Description: QPLib resource manager
*/
+#define dev_fmt(fmt) "QPLIB: " fmt
+
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/interrupt.h>
@@ -68,8 +70,7 @@ static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
pbl->pg_map_arr[i]);
else
dev_warn(&pdev->dev,
- "QPLIB: PBL free pg_arr[%d] empty?!",
- i);
+ "PBL free pg_arr[%d] empty?!\n", i);
pbl->pg_arr[i] = NULL;
}
}
@@ -537,7 +538,7 @@ static void bnxt_qplib_free_pkey_tbl(struct bnxt_qplib_res *res,
struct bnxt_qplib_pkey_tbl *pkey_tbl)
{
if (!pkey_tbl->tbl)
- dev_dbg(&res->pdev->dev, "QPLIB: PKEY tbl not present");
+ dev_dbg(&res->pdev->dev, "PKEY tbl not present\n");
else
kfree(pkey_tbl->tbl);
@@ -578,7 +579,7 @@ int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
struct bnxt_qplib_pd *pd)
{
if (test_and_set_bit(pd->id, pdt->tbl)) {
- dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d",
+ dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d\n",
pd->id);
return -EINVAL;
}
@@ -639,11 +640,11 @@ int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
struct bnxt_qplib_dpi *dpi)
{
if (dpi->dpi >= dpit->max) {
- dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d", dpi->dpi);
+ dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d\n", dpi->dpi);
return -EINVAL;
}
if (test_and_set_bit(dpi->dpi, dpit->tbl)) {
- dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d",
+ dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d\n",
dpi->dpi);
return -EINVAL;
}
@@ -673,22 +674,21 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
u32 dbr_len, bytes;
if (dpit->dbr_bar_reg_iomem) {
- dev_err(&res->pdev->dev,
- "QPLIB: DBR BAR region %d already mapped", dbr_bar_reg);
+ dev_err(&res->pdev->dev, "DBR BAR region %d already mapped\n",
+ dbr_bar_reg);
return -EALREADY;
}
bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg);
if (!bar_reg_base) {
- dev_err(&res->pdev->dev,
- "QPLIB: BAR region %d resc start failed", dbr_bar_reg);
+ dev_err(&res->pdev->dev, "BAR region %d resc start failed\n",
+ dbr_bar_reg);
return -ENOMEM;
}
dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset;
if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) {
- dev_err(&res->pdev->dev, "QPLIB: Invalid DBR length %d",
- dbr_len);
+ dev_err(&res->pdev->dev, "Invalid DBR length %d\n", dbr_len);
return -ENOMEM;
}
@@ -696,8 +696,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
dbr_len);
if (!dpit->dbr_bar_reg_iomem) {
dev_err(&res->pdev->dev,
- "QPLIB: FP: DBR BAR region %d mapping failed",
- dbr_bar_reg);
+ "FP: DBR BAR region %d mapping failed\n", dbr_bar_reg);
return -ENOMEM;
}
@@ -767,7 +766,7 @@ static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
&stats->dma_map, GFP_KERNEL);
if (!stats->dma) {
- dev_err(&pdev->dev, "QPLIB: Stats DMA allocation failed");
+ dev_err(&pdev->dev, "Stats DMA allocation failed\n");
return -ENOMEM;
}
return 0;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 4097f3fa25c5..5216b5f844cc 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -36,6 +36,8 @@
* Description: Slow Path Operators
*/
+#define dev_fmt(fmt) "QPLIB: " fmt
+
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
@@ -89,7 +91,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
if (!sbuf) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: SP: QUERY_FUNC alloc side buffer failed");
+ "SP: QUERY_FUNC alloc side buffer failed\n");
return -ENOMEM;
}
@@ -135,8 +137,16 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
attr->max_srq = le16_to_cpu(sb->max_srq);
attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
attr->max_srq_sges = sb->max_srq_sge;
- /* Bono only reports 1 PKEY for now, but it can support > 1 */
attr->max_pkey = le32_to_cpu(sb->max_pkeys);
+ /*
+ * Some versions of FW reports more than 0xFFFF.
+ * Restrict it for now to 0xFFFF to avoid
+ * reporting trucated value
+ */
+ if (attr->max_pkey > 0xFFFF) {
+ /* ib_port_attr::pkey_tbl_len is u16 */
+ attr->max_pkey = 0xFFFF;
+ }
attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
attr->l2_db_size = (sb->l2_db_space_size + 1) *
@@ -186,8 +196,7 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
(void *)&resp,
NULL, 0);
if (rc) {
- dev_err(&res->pdev->dev,
- "QPLIB: Failed to set function resources");
+ dev_err(&res->pdev->dev, "Failed to set function resources\n");
}
return rc;
}
@@ -199,7 +208,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
{
if (index >= sgid_tbl->max) {
dev_err(&res->pdev->dev,
- "QPLIB: Index %d exceeded SGID table max (%d)",
+ "Index %d exceeded SGID table max (%d)\n",
index, sgid_tbl->max);
return -EINVAL;
}
@@ -217,13 +226,12 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
int index;
if (!sgid_tbl) {
- dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
+ dev_err(&res->pdev->dev, "SGID table not allocated\n");
return -EINVAL;
}
/* Do we need a sgid_lock here? */
if (!sgid_tbl->active) {
- dev_err(&res->pdev->dev,
- "QPLIB: SGID table has no active entries");
+ dev_err(&res->pdev->dev, "SGID table has no active entries\n");
return -ENOMEM;
}
for (index = 0; index < sgid_tbl->max; index++) {
@@ -231,7 +239,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
break;
}
if (index == sgid_tbl->max) {
- dev_warn(&res->pdev->dev, "GID not found in the SGID table");
+ dev_warn(&res->pdev->dev, "GID not found in the SGID table\n");
return 0;
}
/* Remove GID from the SGID table */
@@ -244,7 +252,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
RCFW_CMD_PREP(req, DELETE_GID, cmd_flags);
if (sgid_tbl->hw_id[index] == 0xFFFF) {
dev_err(&res->pdev->dev,
- "QPLIB: GID entry contains an invalid HW id");
+ "GID entry contains an invalid HW id\n");
return -EINVAL;
}
req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
@@ -258,7 +266,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
sgid_tbl->vlan[index] = 0;
sgid_tbl->active--;
dev_dbg(&res->pdev->dev,
- "QPLIB: SGID deleted hw_id[0x%x] = 0x%x active = 0x%x",
+ "SGID deleted hw_id[0x%x] = 0x%x active = 0x%x\n",
index, sgid_tbl->hw_id[index], sgid_tbl->active);
sgid_tbl->hw_id[index] = (u16)-1;
@@ -277,20 +285,19 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
int i, free_idx;
if (!sgid_tbl) {
- dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
+ dev_err(&res->pdev->dev, "SGID table not allocated\n");
return -EINVAL;
}
/* Do we need a sgid_lock here? */
if (sgid_tbl->active == sgid_tbl->max) {
- dev_err(&res->pdev->dev, "QPLIB: SGID table is full");
+ dev_err(&res->pdev->dev, "SGID table is full\n");
return -ENOMEM;
}
free_idx = sgid_tbl->max;
for (i = 0; i < sgid_tbl->max; i++) {
if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) {
dev_dbg(&res->pdev->dev,
- "QPLIB: SGID entry already exist in entry %d!",
- i);
+ "SGID entry already exist in entry %d!\n", i);
*index = i;
return -EALREADY;
} else if (!memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
@@ -301,7 +308,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
}
if (free_idx == sgid_tbl->max) {
dev_err(&res->pdev->dev,
- "QPLIB: SGID table is FULL but count is not MAX??");
+ "SGID table is FULL but count is not MAX??\n");
return -ENOMEM;
}
if (update) {
@@ -348,7 +355,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
sgid_tbl->vlan[free_idx] = 1;
dev_dbg(&res->pdev->dev,
- "QPLIB: SGID added hw_id[0x%x] = 0x%x active = 0x%x",
+ "SGID added hw_id[0x%x] = 0x%x active = 0x%x\n",
free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active);
*index = free_idx;
@@ -404,7 +411,7 @@ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
}
if (index >= pkey_tbl->max) {
dev_err(&res->pdev->dev,
- "QPLIB: Index %d exceeded PKEY table max (%d)",
+ "Index %d exceeded PKEY table max (%d)\n",
index, pkey_tbl->max);
return -EINVAL;
}
@@ -419,14 +426,13 @@ int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
int i, rc = 0;
if (!pkey_tbl) {
- dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated");
+ dev_err(&res->pdev->dev, "PKEY table not allocated\n");
return -EINVAL;
}
/* Do we need a pkey_lock here? */
if (!pkey_tbl->active) {
- dev_err(&res->pdev->dev,
- "QPLIB: PKEY table has no active entries");
+ dev_err(&res->pdev->dev, "PKEY table has no active entries\n");
return -ENOMEM;
}
for (i = 0; i < pkey_tbl->max; i++) {
@@ -435,8 +441,7 @@ int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
}
if (i == pkey_tbl->max) {
dev_err(&res->pdev->dev,
- "QPLIB: PKEY 0x%04x not found in the pkey table",
- *pkey);
+ "PKEY 0x%04x not found in the pkey table\n", *pkey);
return -ENOMEM;
}
memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey));
@@ -453,13 +458,13 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
int i, free_idx, rc = 0;
if (!pkey_tbl) {
- dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated");
+ dev_err(&res->pdev->dev, "PKEY table not allocated\n");
return -EINVAL;
}
/* Do we need a pkey_lock here? */
if (pkey_tbl->active == pkey_tbl->max) {
- dev_err(&res->pdev->dev, "QPLIB: PKEY table is full");
+ dev_err(&res->pdev->dev, "PKEY table is full\n");
return -ENOMEM;
}
free_idx = pkey_tbl->max;
@@ -471,7 +476,7 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
}
if (free_idx == pkey_tbl->max) {
dev_err(&res->pdev->dev,
- "QPLIB: PKEY table is FULL but count is not MAX??");
+ "PKEY table is FULL but count is not MAX??\n");
return -ENOMEM;
}
/* Add PKEY to the pkey_tbl */
@@ -555,8 +560,7 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
int rc;
if (mrw->lkey == 0xFFFFFFFF) {
- dev_info(&res->pdev->dev,
- "QPLIB: SP: Free a reserved lkey MRW");
+ dev_info(&res->pdev->dev, "SP: Free a reserved lkey MRW\n");
return 0;
}
@@ -666,9 +670,8 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
pages++;
if (pages > MAX_PBL_LVL_1_PGS) {
- dev_err(&res->pdev->dev, "QPLIB: SP: Reg MR pages ");
dev_err(&res->pdev->dev,
- "requested (0x%x) exceeded max (0x%x)",
+ "SP: Reg MR pages requested (0x%x) exceeded max (0x%x)\n",
pages, MAX_PBL_LVL_1_PGS);
return -ENOMEM;
}
@@ -684,7 +687,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
HWQ_TYPE_CTX);
if (rc) {
dev_err(&res->pdev->dev,
- "SP: Reg MR memory allocation failed");
+ "SP: Reg MR memory allocation failed\n");
return -ENOMEM;
}
/* Write to the hwq */
@@ -795,7 +798,7 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
if (!sbuf) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: SP: QUERY_ROCE_STATS alloc side buffer failed");
+ "SP: QUERY_ROCE_STATS alloc side buffer failed\n");
return -ENOMEM;
}
@@ -845,6 +848,16 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
stats->res_srq_load_err = le64_to_cpu(sb->res_srq_load_err);
stats->res_tx_pci_err = le64_to_cpu(sb->res_tx_pci_err);
stats->res_rx_pci_err = le64_to_cpu(sb->res_rx_pci_err);
+ if (!rcfw->init_oos_stats) {
+ rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count);
+ rcfw->init_oos_stats = 1;
+ } else {
+ stats->res_oos_drop_count +=
+ (le64_to_cpu(sb->res_oos_drop_count) -
+ rcfw->oos_prev) & BNXT_QPLIB_OOS_COUNT_MASK;
+ rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count);
+ }
+
bail:
bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
return rc;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 9d3e8b994945..8079d7f5a008 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -205,6 +205,16 @@ struct bnxt_qplib_roce_stats {
/* res_tx_pci_err is 64 b */
u64 res_rx_pci_err;
/* res_rx_pci_err is 64 b */
+ u64 res_oos_drop_count;
+ /* res_oos_drop_count */
+ u64 active_qp_count_p0;
+ /* port 0 active qps */
+ u64 active_qp_count_p1;
+ /* port 1 active qps */
+ u64 active_qp_count_p2;
+ /* port 2 active qps */
+ u64 active_qp_count_p3;
+ /* port 3 active qps */
};
int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 3e5a4f760d0e..8a9ead419ac2 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -2929,6 +2929,11 @@ struct creq_query_roce_stats_resp_sb {
__le64 res_srq_load_err;
__le64 res_tx_pci_err;
__le64 res_rx_pci_err;
+ __le64 res_oos_drop_count;
+ __le64 active_qp_count_p0;
+ __le64 active_qp_count_p1;
+ __le64 active_qp_count_p2;
+ __le64 active_qp_count_p3;
};
/* QP error notification event (16 bytes) */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 1b9ff21aa1d5..ebbec02cebe0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1127,17 +1127,18 @@ static int iwch_query_port(struct ib_device *ibdev,
return 0;
}
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
@@ -1148,9 +1149,10 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.driver);
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
@@ -1158,6 +1160,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
iwch_dev->rdev.rnic_info.pdev->device);
}
+static DEVICE_ATTR_RO(board_id);
enum counters {
IPINRECEIVES,
@@ -1274,14 +1277,15 @@ static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
return stats->num_counters;
}
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static struct attribute *iwch_class_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
+};
-static struct device_attribute *iwch_class_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id,
+static const struct attribute_group iwch_attr_group = {
+ .attrs = iwch_class_attributes,
};
static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
@@ -1316,10 +1320,8 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str)
int iwch_register_device(struct iwch_dev *dev)
{
int ret;
- int i;
pr_debug("%s iwch_dev %p\n", __func__, dev);
- strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
dev->ibdev.owner = THIS_MODULE;
@@ -1402,33 +1404,16 @@ int iwch_register_device(struct iwch_dev *dev)
sizeof(dev->ibdev.iwcm->ifname));
dev->ibdev.driver_id = RDMA_DRIVER_CXGB3;
- ret = ib_register_device(&dev->ibdev, NULL);
+ rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
+ ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL);
if (ret)
- goto bail1;
-
- for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) {
- ret = device_create_file(&dev->ibdev.dev,
- iwch_class_attributes[i]);
- if (ret) {
- goto bail2;
- }
- }
- return 0;
-bail2:
- ib_unregister_device(&dev->ibdev);
-bail1:
- kfree(dev->ibdev.iwcm);
+ kfree(dev->ibdev.iwcm);
return ret;
}
void iwch_unregister_device(struct iwch_dev *dev)
{
- int i;
-
pr_debug("%s iwch_dev %p\n", __func__, dev);
- for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
- device_remove_file(&dev->ibdev.dev,
- iwch_class_attributes[i]);
ib_unregister_device(&dev->ibdev);
kfree(dev->ibdev.iwcm);
return;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 0f83cbec33f3..615413bd3e8d 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -403,8 +403,7 @@ void _c4iw_free_ep(struct kref *kref)
ep->com.local_addr.ss_family);
dst_release(ep->dst);
cxgb4_l2t_release(ep->l2t);
- if (ep->mpa_skb)
- kfree_skb(ep->mpa_skb);
+ kfree_skb(ep->mpa_skb);
}
if (!skb_queue_empty(&ep->com.ep_skb_list))
skb_queue_purge(&ep->com.ep_skb_list);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 6d3042794094..1fd8798d91a7 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -161,7 +161,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
cq->gts = rdev->lldi.gts_reg;
cq->rdev = rdev;
- cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS,
+ cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, CXGB4_BAR2_QTYPE_INGRESS,
&cq->bar2_qid,
user ? &cq->bar2_pa : NULL);
if (user && !cq->bar2_pa) {
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 4eda6872e617..cbb3c0ddd990 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -373,8 +373,8 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
return 0;
}
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
@@ -382,9 +382,10 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%d\n",
CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
@@ -395,9 +396,10 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.driver);
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
@@ -405,6 +407,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
c4iw_dev->rdev.lldi.pdev->device);
}
+static DEVICE_ATTR_RO(board_id);
enum counters {
IP4INSEGS,
@@ -461,14 +464,15 @@ static int c4iw_get_mib(struct ib_device *ibdev,
return stats->num_counters;
}
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static struct attribute *c4iw_class_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
+};
-static struct device_attribute *c4iw_class_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id,
+static const struct attribute_group c4iw_attr_group = {
+ .attrs = c4iw_class_attributes,
};
static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
@@ -530,12 +534,10 @@ static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res)
void c4iw_register_device(struct work_struct *work)
{
int ret;
- int i;
struct uld_ctx *ctx = container_of(work, struct uld_ctx, reg_work);
struct c4iw_dev *dev = ctx->dev;
pr_debug("c4iw_dev %p\n", dev);
- strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
dev->ibdev.owner = THIS_MODULE;
@@ -626,20 +628,13 @@ void c4iw_register_device(struct work_struct *work)
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
sizeof(dev->ibdev.iwcm->ifname));
+ rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group);
dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
- ret = ib_register_device(&dev->ibdev, NULL);
+ ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL);
if (ret)
goto err_kfree_iwcm;
-
- for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) {
- ret = device_create_file(&dev->ibdev.dev,
- c4iw_class_attributes[i]);
- if (ret)
- goto err_unregister_device;
- }
return;
-err_unregister_device:
- ib_unregister_device(&dev->ibdev);
+
err_kfree_iwcm:
kfree(dev->ibdev.iwcm);
err_dealloc_ctx:
@@ -651,12 +646,7 @@ err_dealloc_ctx:
void c4iw_unregister_device(struct c4iw_dev *dev)
{
- int i;
-
pr_debug("c4iw_dev %p\n", dev);
- for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i)
- device_remove_file(&dev->ibdev.dev,
- c4iw_class_attributes[i]);
ib_unregister_device(&dev->ibdev);
kfree(dev->ibdev.iwcm);
return;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index b3203afa3b1d..74eb70300fdf 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -279,12 +279,13 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
wq->db = rdev->lldi.db_reg;
- wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS,
+ wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid,
+ CXGB4_BAR2_QTYPE_EGRESS,
&wq->sq.bar2_qid,
user ? &wq->sq.bar2_pa : NULL);
if (need_rq)
wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
- T4_BAR2_QTYPE_EGRESS,
+ CXGB4_BAR2_QTYPE_EGRESS,
&wq->rq.bar2_qid,
user ? &wq->rq.bar2_pa : NULL);
@@ -1685,6 +1686,12 @@ static void flush_qp(struct c4iw_qp *qhp)
schp = to_c4iw_cq(qhp->ibqp.send_cq);
if (qhp->ibqp.uobject) {
+
+ /* for user qps, qhp->wq.flushed is protected by qhp->mutex */
+ if (qhp->wq.flushed)
+ return;
+
+ qhp->wq.flushed = 1;
t4_set_wq_in_error(&qhp->wq, 0);
t4_set_cq_in_error(&rchp->cq);
spin_lock_irqsave(&rchp->comp_handler_lock, flag);
@@ -2566,7 +2573,7 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
memset(wq->queue, 0, wq->memsize);
pci_unmap_addr_set(wq, mapping, wq->dma_addr);
- wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS,
+ wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS,
&wq->bar2_qid,
user ? &wq->bar2_pa : NULL);
@@ -2807,8 +2814,7 @@ err_free_queue:
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
srq->wr_waitp);
err_free_skb:
- if (srq->destroy_skb)
- kfree_skb(srq->destroy_skb);
+ kfree_skb(srq->destroy_skb);
err_free_srq_idx:
c4iw_free_srq_idx(&rhp->rdev, srq->idx);
err_free_wr_wait:
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index f451ba912f47..ff790390c91a 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -8,12 +8,42 @@
#
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
-hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
- eprom.o exp_rcv.o file_ops.o firmware.o \
- init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
- qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
- uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
- verbs_txreq.o vnic_main.o vnic_sdma.o
+hfi1-y := \
+ affinity.o \
+ chip.o \
+ device.o \
+ driver.o \
+ efivar.o \
+ eprom.o \
+ exp_rcv.o \
+ file_ops.o \
+ firmware.o \
+ init.o \
+ intr.o \
+ iowait.o \
+ mad.o \
+ mmu_rb.o \
+ msix.o \
+ pcie.o \
+ pio.o \
+ pio_copy.o \
+ platform.o \
+ qp.o \
+ qsfp.o \
+ rc.o \
+ ruc.o \
+ sdma.o \
+ sysfs.o \
+ trace.o \
+ uc.o \
+ ud.o \
+ user_exp_rcv.o \
+ user_pages.o \
+ user_sdma.o \
+ verbs.o \
+ verbs_txreq.o \
+ vnic_main.o \
+ vnic_sdma.o
ifdef CONFIG_DEBUG_FS
hfi1-y += debugfs.o
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index bedd5fba33b0..2baf38cc1e23 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -817,10 +817,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
set = &entry->def_intr;
cpumask_set_cpu(cpu, &set->mask);
cpumask_set_cpu(cpu, &set->used);
- for (i = 0; i < dd->num_msix_entries; i++) {
+ for (i = 0; i < dd->msix_info.max_requested; i++) {
struct hfi1_msix_entry *other_msix;
- other_msix = &dd->msix_entries[i];
+ other_msix = &dd->msix_info.msix_entries[i];
if (other_msix->type != IRQ_SDMA || other_msix == msix)
continue;
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 2c19bf772451..9b20479dc710 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -67,8 +67,6 @@
#include "debugfs.h"
#include "fault.h"
-#define NUM_IB_PORTS 1
-
uint kdeth_qp;
module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
@@ -1100,9 +1098,9 @@ struct err_reg_info {
const char *desc;
};
-#define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
-#define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
-#define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
+#define NUM_MISC_ERRS (IS_GENERAL_ERR_END + 1 - IS_GENERAL_ERR_START)
+#define NUM_DC_ERRS (IS_DC_END + 1 - IS_DC_START)
+#define NUM_VARIOUS (IS_VARIOUS_END + 1 - IS_VARIOUS_START)
/*
* Helpers for building HFI and DC error interrupt table entries. Different
@@ -6733,6 +6731,7 @@ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
struct hfi1_devdata *dd = ppd->dd;
struct send_context *sc;
int i;
+ int sc_flags;
if (flags & FREEZE_SELF)
write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
@@ -6743,11 +6742,13 @@ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
/* notify all SDMA engines that they are going into a freeze */
sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN));
+ sc_flags = SCF_FROZEN | SCF_HALTED | (flags & FREEZE_LINK_DOWN ?
+ SCF_LINK_DOWN : 0);
/* do halt pre-handling on all enabled send contexts */
for (i = 0; i < dd->num_send_contexts; i++) {
sc = dd->send_contexts[i].sc;
if (sc && (sc->flags & SCF_ENABLED))
- sc_stop(sc, SCF_FROZEN | SCF_HALTED);
+ sc_stop(sc, sc_flags);
}
/* Send context are frozen. Notify user space */
@@ -8178,7 +8179,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
/**
* is_rcv_urgent_int() - User receive context urgent IRQ handler
* @dd: valid dd
- * @source: logical IRQ source (ofse from IS_RCVURGENT_START)
+ * @source: logical IRQ source (offset from IS_RCVURGENT_START)
*
* RX block receive urgent interrupt. Source is < 160.
*
@@ -8228,7 +8229,7 @@ static const struct is_table is_table[] = {
is_sdma_eng_err_name, is_sdma_eng_err_int },
{ IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
is_sendctxt_err_name, is_sendctxt_err_int },
-{ IS_SDMA_START, IS_SDMA_END,
+{ IS_SDMA_START, IS_SDMA_IDLE_END,
is_sdma_eng_name, is_sdma_eng_int },
{ IS_VARIOUS_START, IS_VARIOUS_END,
is_various_name, is_various_int },
@@ -8254,7 +8255,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
/* avoids a double compare by walking the table in-order */
for (entry = &is_table[0]; entry->is_name; entry++) {
- if (source < entry->end) {
+ if (source <= entry->end) {
trace_hfi1_interrupt(dd, entry, source);
entry->is_int(dd, source - entry->start);
return;
@@ -8273,7 +8274,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
* context DATA IRQs are threaded and are not supported by this handler.
*
*/
-static irqreturn_t general_interrupt(int irq, void *data)
+irqreturn_t general_interrupt(int irq, void *data)
{
struct hfi1_devdata *dd = data;
u64 regs[CCE_NUM_INT_CSRS];
@@ -8306,7 +8307,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
return handled;
}
-static irqreturn_t sdma_interrupt(int irq, void *data)
+irqreturn_t sdma_interrupt(int irq, void *data)
{
struct sdma_engine *sde = data;
struct hfi1_devdata *dd = sde->dd;
@@ -8398,7 +8399,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
* invoked) is finished. The intent is to avoid extra interrupts while we
* are processing packets anyway.
*/
-static irqreturn_t receive_context_interrupt(int irq, void *data)
+irqreturn_t receive_context_interrupt(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
struct hfi1_devdata *dd = rcd->dd;
@@ -8438,7 +8439,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data)
* Receive packet thread handler. This expects to be invoked with the
* receive interrupt still blocked.
*/
-static irqreturn_t receive_context_thread(int irq, void *data)
+irqreturn_t receive_context_thread(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
int present;
@@ -9648,30 +9649,10 @@ void qsfp_event(struct work_struct *work)
}
}
-static void init_qsfp_int(struct hfi1_devdata *dd)
+void init_qsfp_int(struct hfi1_devdata *dd)
{
struct hfi1_pportdata *ppd = dd->pport;
- u64 qsfp_mask, cce_int_mask;
- const int qsfp1_int_smask = QSFP1_INT % 64;
- const int qsfp2_int_smask = QSFP2_INT % 64;
-
- /*
- * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
- * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
- * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
- * the index of the appropriate CSR in the CCEIntMask CSR array
- */
- cce_int_mask = read_csr(dd, CCE_INT_MASK +
- (8 * (QSFP1_INT / 64)));
- if (dd->hfi1_id) {
- cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
- write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)),
- cce_int_mask);
- } else {
- cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
- write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)),
- cce_int_mask);
- }
+ u64 qsfp_mask;
qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
/* Clear current status to avoid spurious interrupts */
@@ -9688,6 +9669,12 @@ static void init_qsfp_int(struct hfi1_devdata *dd)
write_csr(dd,
dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
qsfp_mask);
+
+ /* Enable the appropriate QSFP IRQ source */
+ if (!dd->hfi1_id)
+ set_intr_bits(dd, QSFP1_INT, QSFP1_INT, true);
+ else
+ set_intr_bits(dd, QSFP2_INT, QSFP2_INT, true);
}
/*
@@ -10574,12 +10561,29 @@ void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
}
}
-/*
- * Verify if BCT for data VLs is non-zero.
+/**
+ * data_vls_operational() - Verify if data VL BCT credits and MTU
+ * are both set.
+ * @ppd: pointer to hfi1_pportdata structure
+ *
+ * Return: true - Ok, false -otherwise.
*/
static inline bool data_vls_operational(struct hfi1_pportdata *ppd)
{
- return !!ppd->actual_vls_operational;
+ int i;
+ u64 reg;
+
+ if (!ppd->actual_vls_operational)
+ return false;
+
+ for (i = 0; i < ppd->vls_supported; i++) {
+ reg = read_csr(ppd->dd, SEND_CM_CREDIT_VL + (8 * i));
+ if ((reg && !ppd->dd->vld[i].mtu) ||
+ (!reg && ppd->dd->vld[i].mtu))
+ return false;
+ }
+
+ return true;
}
/*
@@ -10674,6 +10678,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
handle_linkup_change(dd, 1);
+ pio_kernel_linkup(dd);
/*
* After link up, a new link width will have been set.
@@ -10691,7 +10696,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
if (!data_vls_operational(ppd)) {
dd_dev_err(dd,
- "%s: data VLs not operational\n", __func__);
+ "%s: Invalid data VL credits or mtu\n",
+ __func__);
ret = -EINVAL;
break;
}
@@ -11928,10 +11934,16 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
}
- if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
+ if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) {
+ set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
+ IS_RCVAVAIL_START + rcd->ctxt, true);
rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
- if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
+ }
+ if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) {
+ set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
+ IS_RCVAVAIL_START + rcd->ctxt, false);
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
+ }
if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
@@ -11961,6 +11973,13 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
+ if (op & HFI1_RCVCTRL_URGENT_ENB)
+ set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
+ IS_RCVURGENT_START + rcd->ctxt, true);
+ if (op & HFI1_RCVCTRL_URGENT_DIS)
+ set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
+ IS_RCVURGENT_START + rcd->ctxt, false);
+
hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl);
@@ -12959,63 +12978,71 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
return ret;
}
+/* ========================================================================= */
+
/**
- * get_int_mask - get 64 bit int mask
- * @dd - the devdata
- * @i - the csr (relative to CCE_INT_MASK)
+ * read_mod_write() - Calculate the IRQ register index and set/clear the bits
+ * @dd: valid devdata
+ * @src: IRQ source to determine register index from
+ * @bits: the bits to set or clear
+ * @set: true == set the bits, false == clear the bits
*
- * Returns the mask with the urgent interrupt mask
- * bit clear for kernel receive contexts.
*/
-static u64 get_int_mask(struct hfi1_devdata *dd, u32 i)
+static void read_mod_write(struct hfi1_devdata *dd, u16 src, u64 bits,
+ bool set)
{
- u64 mask = U64_MAX; /* default to no change */
-
- if (i >= (IS_RCVURGENT_START / 64) && i < (IS_RCVURGENT_END / 64)) {
- int j = (i - (IS_RCVURGENT_START / 64)) * 64;
- int k = !j ? IS_RCVURGENT_START % 64 : 0;
+ u64 reg;
+ u16 idx = src / BITS_PER_REGISTER;
- if (j)
- j -= IS_RCVURGENT_START % 64;
- /* j = 0..dd->first_dyn_alloc_ctxt - 1,k = 0..63 */
- for (; j < dd->first_dyn_alloc_ctxt && k < 64; j++, k++)
- /* convert to bit in mask and clear */
- mask &= ~BIT_ULL(k);
- }
- return mask;
+ spin_lock(&dd->irq_src_lock);
+ reg = read_csr(dd, CCE_INT_MASK + (8 * idx));
+ if (set)
+ reg |= bits;
+ else
+ reg &= ~bits;
+ write_csr(dd, CCE_INT_MASK + (8 * idx), reg);
+ spin_unlock(&dd->irq_src_lock);
}
-/* ========================================================================= */
-
-/*
- * Enable/disable chip from delivering interrupts.
+/**
+ * set_intr_bits() - Enable/disable a range (one or more) IRQ sources
+ * @dd: valid devdata
+ * @first: first IRQ source to set/clear
+ * @last: last IRQ source (inclusive) to set/clear
+ * @set: true == set the bits, false == clear the bits
+ *
+ * If first == last, set the exact source.
*/
-void set_intr_state(struct hfi1_devdata *dd, u32 enable)
+int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set)
{
- int i;
+ u64 bits = 0;
+ u64 bit;
+ u16 src;
- /*
- * In HFI, the mask needs to be 1 to allow interrupts.
- */
- if (enable) {
- /* enable all interrupts but urgent on kernel contexts */
- for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
- u64 mask = get_int_mask(dd, i);
+ if (first > NUM_INTERRUPT_SOURCES || last > NUM_INTERRUPT_SOURCES)
+ return -EINVAL;
- write_csr(dd, CCE_INT_MASK + (8 * i), mask);
- }
+ if (last < first)
+ return -ERANGE;
- init_qsfp_int(dd);
- } else {
- for (i = 0; i < CCE_NUM_INT_CSRS; i++)
- write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
+ for (src = first; src <= last; src++) {
+ bit = src % BITS_PER_REGISTER;
+ /* wrapped to next register? */
+ if (!bit && bits) {
+ read_mod_write(dd, src - 1, bits, set);
+ bits = 0;
+ }
+ bits |= BIT_ULL(bit);
}
+ read_mod_write(dd, last, bits, set);
+
+ return 0;
}
/*
* Clear all interrupt sources on the chip.
*/
-static void clear_all_interrupts(struct hfi1_devdata *dd)
+void clear_all_interrupts(struct hfi1_devdata *dd)
{
int i;
@@ -13039,38 +13066,11 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
}
-/**
- * hfi1_clean_up_interrupts() - Free all IRQ resources
- * @dd: valid device data data structure
- *
- * Free the MSIx and assoicated PCI resources, if they have been allocated.
- */
-void hfi1_clean_up_interrupts(struct hfi1_devdata *dd)
-{
- int i;
- struct hfi1_msix_entry *me = dd->msix_entries;
-
- /* remove irqs - must happen before disabling/turning off */
- for (i = 0; i < dd->num_msix_entries; i++, me++) {
- if (!me->arg) /* => no irq, no affinity */
- continue;
- hfi1_put_irq_affinity(dd, me);
- pci_free_irq(dd->pcidev, i, me->arg);
- }
-
- /* clean structures */
- kfree(dd->msix_entries);
- dd->msix_entries = NULL;
- dd->num_msix_entries = 0;
-
- pci_free_irq_vectors(dd->pcidev);
-}
-
/*
* Remap the interrupt source from the general handler to the given MSI-X
* interrupt.
*/
-static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
+void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
{
u64 reg;
int m, n;
@@ -13094,8 +13094,7 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
write_csr(dd, CCE_INT_MAP + (8 * m), reg);
}
-static void remap_sdma_interrupts(struct hfi1_devdata *dd,
- int engine, int msix_intr)
+void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr)
{
/*
* SDMA engine interrupt sources grouped by type, rather than
@@ -13104,204 +13103,16 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd,
* SDMAProgress
* SDMAIdle
*/
- remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
- remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
- remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
-}
-
-static int request_msix_irqs(struct hfi1_devdata *dd)
-{
- int first_general, last_general;
- int first_sdma, last_sdma;
- int first_rx, last_rx;
- int i, ret = 0;
-
- /* calculate the ranges we are going to use */
- first_general = 0;
- last_general = first_general + 1;
- first_sdma = last_general;
- last_sdma = first_sdma + dd->num_sdma;
- first_rx = last_sdma;
- last_rx = first_rx + dd->n_krcv_queues + dd->num_vnic_contexts;
-
- /* VNIC MSIx interrupts get mapped when VNIC contexts are created */
- dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues;
-
- /*
- * Sanity check - the code expects all SDMA chip source
- * interrupts to be in the same CSR, starting at bit 0. Verify
- * that this is true by checking the bit location of the start.
- */
- BUILD_BUG_ON(IS_SDMA_START % 64);
-
- for (i = 0; i < dd->num_msix_entries; i++) {
- struct hfi1_msix_entry *me = &dd->msix_entries[i];
- const char *err_info;
- irq_handler_t handler;
- irq_handler_t thread = NULL;
- void *arg = NULL;
- int idx;
- struct hfi1_ctxtdata *rcd = NULL;
- struct sdma_engine *sde = NULL;
- char name[MAX_NAME_SIZE];
-
- /* obtain the arguments to pci_request_irq */
- if (first_general <= i && i < last_general) {
- idx = i - first_general;
- handler = general_interrupt;
- arg = dd;
- snprintf(name, sizeof(name),
- DRIVER_NAME "_%d", dd->unit);
- err_info = "general";
- me->type = IRQ_GENERAL;
- } else if (first_sdma <= i && i < last_sdma) {
- idx = i - first_sdma;
- sde = &dd->per_sdma[idx];
- handler = sdma_interrupt;
- arg = sde;
- snprintf(name, sizeof(name),
- DRIVER_NAME "_%d sdma%d", dd->unit, idx);
- err_info = "sdma";
- remap_sdma_interrupts(dd, idx, i);
- me->type = IRQ_SDMA;
- } else if (first_rx <= i && i < last_rx) {
- idx = i - first_rx;
- rcd = hfi1_rcd_get_by_index_safe(dd, idx);
- if (rcd) {
- /*
- * Set the interrupt register and mask for this
- * context's interrupt.
- */
- rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
- rcd->imask = ((u64)1) <<
- ((IS_RCVAVAIL_START + idx) % 64);
- handler = receive_context_interrupt;
- thread = receive_context_thread;
- arg = rcd;
- snprintf(name, sizeof(name),
- DRIVER_NAME "_%d kctxt%d",
- dd->unit, idx);
- err_info = "receive context";
- remap_intr(dd, IS_RCVAVAIL_START + idx, i);
- me->type = IRQ_RCVCTXT;
- rcd->msix_intr = i;
- hfi1_rcd_put(rcd);
- }
- } else {
- /* not in our expected range - complain, then
- * ignore it
- */
- dd_dev_err(dd,
- "Unexpected extra MSI-X interrupt %d\n", i);
- continue;
- }
- /* no argument, no interrupt */
- if (!arg)
- continue;
- /* make sure the name is terminated */
- name[sizeof(name) - 1] = 0;
- me->irq = pci_irq_vector(dd->pcidev, i);
- ret = pci_request_irq(dd->pcidev, i, handler, thread, arg,
- name);
- if (ret) {
- dd_dev_err(dd,
- "unable to allocate %s interrupt, irq %d, index %d, err %d\n",
- err_info, me->irq, idx, ret);
- return ret;
- }
- /*
- * assign arg after pci_request_irq call, so it will be
- * cleaned up
- */
- me->arg = arg;
-
- ret = hfi1_get_irq_affinity(dd, me);
- if (ret)
- dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
- }
-
- return ret;
-}
-
-void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
-{
- int i;
-
- for (i = 0; i < dd->vnic.num_ctxt; i++) {
- struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
- struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
-
- synchronize_irq(me->irq);
- }
-}
-
-void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
-{
- struct hfi1_devdata *dd = rcd->dd;
- struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
-
- if (!me->arg) /* => no irq, no affinity */
- return;
-
- hfi1_put_irq_affinity(dd, me);
- pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
-
- me->arg = NULL;
-}
-
-void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
-{
- struct hfi1_devdata *dd = rcd->dd;
- struct hfi1_msix_entry *me;
- int idx = rcd->ctxt;
- void *arg = rcd;
- int ret;
-
- rcd->msix_intr = dd->vnic.msix_idx++;
- me = &dd->msix_entries[rcd->msix_intr];
-
- /*
- * Set the interrupt register and mask for this
- * context's interrupt.
- */
- rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
- rcd->imask = ((u64)1) <<
- ((IS_RCVAVAIL_START + idx) % 64);
- me->type = IRQ_RCVCTXT;
- me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr);
- remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
-
- ret = pci_request_irq(dd->pcidev, rcd->msix_intr,
- receive_context_interrupt,
- receive_context_thread, arg,
- DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
- if (ret) {
- dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n",
- me->irq, idx, ret);
- return;
- }
- /*
- * assign arg after pci_request_irq call, so it will be
- * cleaned up
- */
- me->arg = arg;
-
- ret = hfi1_get_irq_affinity(dd, me);
- if (ret) {
- dd_dev_err(dd,
- "unable to pin IRQ %d\n", ret);
- pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
- }
+ remap_intr(dd, IS_SDMA_START + engine, msix_intr);
+ remap_intr(dd, IS_SDMA_PROGRESS_START + engine, msix_intr);
+ remap_intr(dd, IS_SDMA_IDLE_START + engine, msix_intr);
}
/*
* Set the general handler to accept all interrupts, remap all
* chip interrupts back to MSI-X 0.
*/
-static void reset_interrupts(struct hfi1_devdata *dd)
+void reset_interrupts(struct hfi1_devdata *dd)
{
int i;
@@ -13314,54 +13125,33 @@ static void reset_interrupts(struct hfi1_devdata *dd)
write_csr(dd, CCE_INT_MAP + (8 * i), 0);
}
+/**
+ * set_up_interrupts() - Initialize the IRQ resources and state
+ * @dd: valid devdata
+ *
+ */
static int set_up_interrupts(struct hfi1_devdata *dd)
{
- u32 total;
- int ret, request;
-
- /*
- * Interrupt count:
- * 1 general, "slow path" interrupt (includes the SDMA engines
- * slow source, SDMACleanupDone)
- * N interrupts - one per used SDMA engine
- * M interrupt - one per kernel receive context
- * V interrupt - one for each VNIC context
- */
- total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
-
- /* ask for MSI-X interrupts */
- request = request_msix(dd, total);
- if (request < 0) {
- ret = request;
- goto fail;
- } else {
- dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
- GFP_KERNEL);
- if (!dd->msix_entries) {
- ret = -ENOMEM;
- goto fail;
- }
- /* using MSI-X */
- dd->num_msix_entries = total;
- dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
- }
+ int ret;
/* mask all interrupts */
- set_intr_state(dd, 0);
+ set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
+
/* clear all pending interrupts */
clear_all_interrupts(dd);
/* reset general handler mask, chip MSI-X mappings */
reset_interrupts(dd);
- ret = request_msix_irqs(dd);
+ /* ask for MSI-X interrupts */
+ ret = msix_initialize(dd);
if (ret)
- goto fail;
+ return ret;
- return 0;
+ ret = msix_request_irqs(dd);
+ if (ret)
+ msix_clean_up_interrupts(dd);
-fail:
- hfi1_clean_up_interrupts(dd);
return ret;
}
@@ -14914,20 +14704,16 @@ err_exit:
}
/**
- * Allocate and initialize the device structure for the hfi.
+ * hfi1_init_dd() - Initialize most of the dd structure.
* @dev: the pci_dev for hfi1_ib device
* @ent: pci_device_id struct for this dev
*
- * Also allocates, initializes, and returns the devdata struct for this
- * device instance
- *
* This is global, and is called directly at init to set up the
* chip-specific function pointers for later use.
*/
-struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+int hfi1_init_dd(struct hfi1_devdata *dd)
{
- struct hfi1_devdata *dd;
+ struct pci_dev *pdev = dd->pcidev;
struct hfi1_pportdata *ppd;
u64 reg;
int i, ret;
@@ -14938,13 +14724,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
"Functional simulator"
};
struct pci_dev *parent = pdev->bus->self;
- u32 sdma_engines;
+ u32 sdma_engines = chip_sdma_engines(dd);
- dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
- sizeof(struct hfi1_pportdata));
- if (IS_ERR(dd))
- goto bail;
- sdma_engines = chip_sdma_engines(dd);
ppd = dd->pport;
for (i = 0; i < dd->num_pports; i++, ppd++) {
int vl;
@@ -15123,6 +14904,12 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_cleanup;
+ /*
+ * This should probably occur in hfi1_pcie_init(), but historically
+ * occurs after the do_pcie_gen3_transition() code.
+ */
+ tune_pcie_caps(dd);
+
/* start setting dd values and adjusting CSRs */
init_early_variables(dd);
@@ -15235,14 +15022,13 @@ bail_free_cntrs:
free_cntrs(dd);
bail_clear_intr:
hfi1_comp_vectors_clean_up(dd);
- hfi1_clean_up_interrupts(dd);
+ msix_clean_up_interrupts(dd);
bail_cleanup:
hfi1_pcie_ddcleanup(dd);
bail_free:
hfi1_free_devdata(dd);
- dd = ERR_PTR(ret);
bail:
- return dd;
+ return ret;
}
static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 36b04d6300e5..6b9c8f12dff8 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -52,9 +52,7 @@
*/
/* sizes */
-#define CCE_NUM_MSIX_VECTORS 256
-#define CCE_NUM_INT_CSRS 12
-#define CCE_NUM_INT_MAP_CSRS 96
+#define BITS_PER_REGISTER (BITS_PER_BYTE * sizeof(u64))
#define NUM_INTERRUPT_SOURCES 768
#define RXE_NUM_CONTEXTS 160
#define RXE_PER_CONTEXT_SIZE 0x1000 /* 4k */
@@ -161,34 +159,49 @@
(CR_CREDIT_RETURN_DUE_TO_FORCE_MASK << \
CR_CREDIT_RETURN_DUE_TO_FORCE_SHIFT)
-/* interrupt source numbers */
-#define IS_GENERAL_ERR_START 0
-#define IS_SDMAENG_ERR_START 16
-#define IS_SENDCTXT_ERR_START 32
-#define IS_SDMA_START 192 /* includes SDmaProgress,SDmaIdle */
+/* Specific IRQ sources */
+#define CCE_ERR_INT 0
+#define RXE_ERR_INT 1
+#define MISC_ERR_INT 2
+#define PIO_ERR_INT 4
+#define SDMA_ERR_INT 5
+#define EGRESS_ERR_INT 6
+#define TXE_ERR_INT 7
+#define PBC_INT 240
+#define GPIO_ASSERT_INT 241
+#define QSFP1_INT 242
+#define QSFP2_INT 243
+#define TCRIT_INT 244
+
+/* interrupt source ranges */
+#define IS_FIRST_SOURCE CCE_ERR_INT
+#define IS_GENERAL_ERR_START 0
+#define IS_SDMAENG_ERR_START 16
+#define IS_SENDCTXT_ERR_START 32
+#define IS_SDMA_START 192
+#define IS_SDMA_PROGRESS_START 208
+#define IS_SDMA_IDLE_START 224
#define IS_VARIOUS_START 240
#define IS_DC_START 248
#define IS_RCVAVAIL_START 256
#define IS_RCVURGENT_START 416
#define IS_SENDCREDIT_START 576
#define IS_RESERVED_START 736
-#define IS_MAX_SOURCES 768
+#define IS_LAST_SOURCE 767
/* derived interrupt source values */
-#define IS_GENERAL_ERR_END IS_SDMAENG_ERR_START
-#define IS_SDMAENG_ERR_END IS_SENDCTXT_ERR_START
-#define IS_SENDCTXT_ERR_END IS_SDMA_START
-#define IS_SDMA_END IS_VARIOUS_START
-#define IS_VARIOUS_END IS_DC_START
-#define IS_DC_END IS_RCVAVAIL_START
-#define IS_RCVAVAIL_END IS_RCVURGENT_START
-#define IS_RCVURGENT_END IS_SENDCREDIT_START
-#define IS_SENDCREDIT_END IS_RESERVED_START
-#define IS_RESERVED_END IS_MAX_SOURCES
-
-/* absolute interrupt numbers for QSFP1Int and QSFP2Int */
-#define QSFP1_INT 242
-#define QSFP2_INT 243
+#define IS_GENERAL_ERR_END 7
+#define IS_SDMAENG_ERR_END 31
+#define IS_SENDCTXT_ERR_END 191
+#define IS_SDMA_END 207
+#define IS_SDMA_PROGRESS_END 223
+#define IS_SDMA_IDLE_END 239
+#define IS_VARIOUS_END 244
+#define IS_DC_END 255
+#define IS_RCVAVAIL_END 415
+#define IS_RCVURGENT_END 575
+#define IS_SENDCREDIT_END 735
+#define IS_RESERVED_END IS_LAST_SOURCE
/* DCC_CFG_PORT_CONFIG logical link states */
#define LSTATE_DOWN 0x1
@@ -1416,6 +1429,18 @@ void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd);
+irqreturn_t general_interrupt(int irq, void *data);
+irqreturn_t sdma_interrupt(int irq, void *data);
+irqreturn_t receive_context_interrupt(int irq, void *data);
+irqreturn_t receive_context_thread(int irq, void *data);
+
+int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set);
+void init_qsfp_int(struct hfi1_devdata *dd);
+void clear_all_interrupts(struct hfi1_devdata *dd);
+void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
+void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
+void reset_interrupts(struct hfi1_devdata *dd);
+
/*
* Interrupt source table.
*
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index ee6dca5e2a2f..c6163a347e93 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -878,6 +878,10 @@
#define SEND_CTRL (TXE + 0x000000000000)
#define SEND_CTRL_CM_RESET_SMASK 0x4ull
#define SEND_CTRL_SEND_ENABLE_SMASK 0x1ull
+#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
+#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xFFull
+#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
+ << SEND_CTRL_UNSUPPORTED_VL_SHIFT)
#define SEND_CTRL_VL_ARBITER_ENABLE_SMASK 0x2ull
#define SEND_CTXT_CHECK_ENABLE (TXE + 0x000000100080)
#define SEND_CTXT_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK 0x80ull
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 1fc75647e47b..c22ebc774a6a 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -681,7 +681,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
HFI1_RCVCTRL_TAILUPD_DIS |
HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
- HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
+ HFI1_RCVCTRL_NO_EGR_DROP_DIS |
+ HFI1_RCVCTRL_URGENT_DIS, uctxt);
/* Clear the context's J_KEY */
hfi1_clear_ctxt_jkey(dd, uctxt);
/*
@@ -1096,6 +1097,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt)
hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey);
rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
+ rcvctrl_ops |= HFI1_RCVCTRL_URGENT_ENB;
if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP))
rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB;
/*
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index d9470317983f..1401b6ea4a28 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -80,6 +80,7 @@
#include "qsfp.h"
#include "platform.h"
#include "affinity.h"
+#include "msix.h"
/* bumped 1 from s/w major version of TrueScale */
#define HFI1_CHIP_VERS_MAJ 3U
@@ -620,6 +621,8 @@ struct rvt_sge_state;
#define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000
#define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000
#define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000
+#define HFI1_RCVCTRL_URGENT_ENB 0x40000
+#define HFI1_RCVCTRL_URGENT_DIS 0x80000
/* partition enforcement flags */
#define HFI1_PART_ENFORCE_IN 0x1
@@ -667,6 +670,14 @@ struct hfi1_msix_entry {
struct irq_affinity_notify notify;
};
+struct hfi1_msix_info {
+ /* lock to synchronize in_use_msix access */
+ spinlock_t msix_lock;
+ DECLARE_BITMAP(in_use_msix, CCE_NUM_MSIX_VECTORS);
+ struct hfi1_msix_entry *msix_entries;
+ u16 max_requested;
+};
+
/* per-SL CCA information */
struct cca_timer {
struct hrtimer hrtimer;
@@ -992,7 +1003,6 @@ struct hfi1_vnic_data {
struct idr vesw_idr;
u8 rmt_start;
u8 num_ctxt;
- u32 msix_idx;
};
struct hfi1_vnic_vport_info;
@@ -1205,11 +1215,6 @@ struct hfi1_devdata {
struct diag_client *diag_client;
- /* MSI-X information */
- struct hfi1_msix_entry *msix_entries;
- u32 num_msix_entries;
- u32 first_dyn_msix_idx;
-
/* general interrupt: mask of handled interrupts */
u64 gi_mask[CCE_NUM_INT_CSRS];
@@ -1223,6 +1228,9 @@ struct hfi1_devdata {
*/
struct timer_list synth_stats_timer;
+ /* MSI-X information */
+ struct hfi1_msix_info msix_info;
+
/*
* device counters
*/
@@ -1349,6 +1357,8 @@ struct hfi1_devdata {
/* vnic data */
struct hfi1_vnic_data vnic;
+ /* Lock to protect IRQ SRC register access */
+ spinlock_t irq_src_lock;
};
static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
@@ -1431,9 +1441,6 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
void set_all_slowpath(struct hfi1_devdata *dd);
-void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd);
-void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd);
-void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd);
extern const struct pci_device_id hfi1_pci_tbl[];
void hfi1_make_ud_req_9B(struct rvt_qp *qp,
@@ -1887,10 +1894,8 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
#define HFI1_CTXT_WAITING_URG 4
/* free up any allocated data at closes */
-struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
- const struct pci_device_id *ent);
+int hfi1_init_dd(struct hfi1_devdata *dd);
void hfi1_free_devdata(struct hfi1_devdata *dd);
-struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
/* LED beaconing functions */
void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
@@ -1963,6 +1968,7 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
*/
extern const char ib_hfi1_version[];
+extern const struct attribute_group ib_hfi1_attr_group;
int hfi1_device_create(struct hfi1_devdata *dd);
void hfi1_device_remove(struct hfi1_devdata *dd);
@@ -1974,16 +1980,15 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd);
/* Hook for sysfs read of QSFP */
int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
-int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent);
-void hfi1_clean_up_interrupts(struct hfi1_devdata *dd);
+int hfi1_pcie_init(struct hfi1_devdata *dd);
void hfi1_pcie_cleanup(struct pci_dev *pdev);
int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
int pcie_speeds(struct hfi1_devdata *dd);
-int request_msix(struct hfi1_devdata *dd, u32 msireq);
int restore_pci_variables(struct hfi1_devdata *dd);
int save_pci_variables(struct hfi1_devdata *dd);
int do_pcie_gen3_transition(struct hfi1_devdata *dd);
+void tune_pcie_caps(struct hfi1_devdata *dd);
int parse_platform_config(struct hfi1_devdata *dd);
int get_platform_config_field(struct hfi1_devdata *dd,
enum platform_config_table_type_encoding
@@ -2124,19 +2129,6 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
return base_sdma_integrity;
}
-/*
- * hfi1_early_err is used (only!) to print early errors before devdata is
- * allocated, or when dd->pcidev may not be valid, and at the tail end of
- * cleanup when devdata may have been freed, etc. hfi1_dev_porterr is
- * the same as dd_dev_err, but is used when the message really needs
- * the IB port# to be definitive as to what's happening..
- */
-#define hfi1_early_err(dev, fmt, ...) \
- dev_err(dev, fmt, ##__VA_ARGS__)
-
-#define hfi1_early_info(dev, fmt, ...) \
- dev_info(dev, fmt, ##__VA_ARGS__)
-
#define dd_dev_emerg(dd, fmt, ...) \
dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \
rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 758d273c32cf..09044905284f 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -83,6 +83,8 @@
#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
+#define NUM_IB_PORTS 1
+
/*
* Number of user receive contexts we are configured to use (to allow for more
* pio buffers per ctxt, etc.) Zero means use one user context per CPU.
@@ -654,9 +656,8 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
if (loopback) {
- hfi1_early_err(&pdev->dev,
- "Faking data partition 0x8001 in idx %u\n",
- !default_pkey_idx);
+ dd_dev_err(dd, "Faking data partition 0x8001 in idx %u\n",
+ !default_pkey_idx);
ppd->pkeys[!default_pkey_idx] = 0x8001;
}
@@ -702,9 +703,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
return;
bail:
-
- hfi1_early_err(&pdev->dev,
- "Congestion Control Agent disabled for port %d\n", port);
+ dd_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port);
}
/*
@@ -833,6 +832,23 @@ wq_error:
}
/**
+ * enable_general_intr() - Enable the IRQs that will be handled by the
+ * general interrupt handler.
+ * @dd: valid devdata
+ *
+ */
+static void enable_general_intr(struct hfi1_devdata *dd)
+{
+ set_intr_bits(dd, CCE_ERR_INT, MISC_ERR_INT, true);
+ set_intr_bits(dd, PIO_ERR_INT, TXE_ERR_INT, true);
+ set_intr_bits(dd, IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, true);
+ set_intr_bits(dd, PBC_INT, GPIO_ASSERT_INT, true);
+ set_intr_bits(dd, TCRIT_INT, TCRIT_INT, true);
+ set_intr_bits(dd, IS_DC_START, IS_DC_END, true);
+ set_intr_bits(dd, IS_SENDCREDIT_START, IS_SENDCREDIT_END, true);
+}
+
+/**
* hfi1_init - do the actual initialization sequence on the chip
* @dd: the hfi1_ib device
* @reinit: re-initializing, so don't allocate new memory
@@ -916,6 +932,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
ret = lastfail;
}
+ /* enable IRQ */
hfi1_rcd_put(rcd);
}
@@ -954,7 +971,8 @@ done:
HFI1_STATUS_INITTED;
if (!ret) {
/* enable all interrupts from the chip */
- set_intr_state(dd, 1);
+ enable_general_intr(dd);
+ init_qsfp_int(dd);
/* chip is OK for user apps; mark it as initialized */
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
@@ -1051,9 +1069,9 @@ static void shutdown_device(struct hfi1_devdata *dd)
}
dd->flags &= ~HFI1_INITTED;
- /* mask and clean up interrupts, but not errors */
- set_intr_state(dd, 0);
- hfi1_clean_up_interrupts(dd);
+ /* mask and clean up interrupts */
+ set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
+ msix_clean_up_interrupts(dd);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
@@ -1246,15 +1264,19 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
kobject_put(&dd->kobj);
}
-/*
- * Allocate our primary per-unit data structure. Must be done via verbs
- * allocator, because the verbs cleanup process both does cleanup and
- * free of the data structure.
+/**
+ * hfi1_alloc_devdata - Allocate our primary per-unit data structure.
+ * @pdev: Valid PCI device
+ * @extra: How many bytes to alloc past the default
+ *
+ * Must be done via verbs allocator, because the verbs cleanup process
+ * both does cleanup and free of the data structure.
* "extra" is for chip-specific data.
*
* Use the idr mechanism to get a unit number for this unit.
*/
-struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
+static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
+ size_t extra)
{
unsigned long flags;
struct hfi1_devdata *dd;
@@ -1287,8 +1309,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
idr_preload_end();
if (ret < 0) {
- hfi1_early_err(&pdev->dev,
- "Could not allocate unit ID: error %d\n", -ret);
+ dev_err(&pdev->dev,
+ "Could not allocate unit ID: error %d\n", -ret);
goto bail;
}
rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
@@ -1309,6 +1331,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
spin_lock_init(&dd->pio_map_lock);
mutex_init(&dd->dc8051_lock);
init_waitqueue_head(&dd->event_queue);
+ spin_lock_init(&dd->irq_src_lock);
dd->int_counter = alloc_percpu(u64);
if (!dd->int_counter) {
@@ -1481,9 +1504,6 @@ static int __init hfi1_mod_init(void)
idr_init(&hfi1_unit_table);
hfi1_dbg_init();
- ret = hfi1_wss_init();
- if (ret < 0)
- goto bail_wss;
ret = pci_register_driver(&hfi1_pci_driver);
if (ret < 0) {
pr_err("Unable to register driver: error %d\n", -ret);
@@ -1492,8 +1512,6 @@ static int __init hfi1_mod_init(void)
goto bail; /* all OK */
bail_dev:
- hfi1_wss_exit();
-bail_wss:
hfi1_dbg_exit();
idr_destroy(&hfi1_unit_table);
dev_cleanup();
@@ -1510,7 +1528,6 @@ static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
node_affinity_destroy_all();
- hfi1_wss_exit();
hfi1_dbg_exit();
idr_destroy(&hfi1_unit_table);
@@ -1604,23 +1621,23 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
hfi1_free_devdata(dd);
}
-static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt)
+static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
{
if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
- hfi1_early_err(dev, "Receive header queue count too small\n");
+ dd_dev_err(dd, "Receive header queue count too small\n");
return -EINVAL;
}
if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
- hfi1_early_err(dev,
- "Receive header queue count cannot be greater than %u\n",
- HFI1_MAX_HDRQ_EGRBUF_CNT);
+ dd_dev_err(dd,
+ "Receive header queue count cannot be greater than %u\n",
+ HFI1_MAX_HDRQ_EGRBUF_CNT);
return -EINVAL;
}
if (thecnt % HDRQ_INCREMENT) {
- hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n",
- thecnt, HDRQ_INCREMENT);
+ dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
+ thecnt, HDRQ_INCREMENT);
return -EINVAL;
}
@@ -1639,22 +1656,29 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Validate dev ids */
if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
ent->device == PCI_DEVICE_ID_INTEL1)) {
- hfi1_early_err(&pdev->dev,
- "Failing on unknown Intel deviceid 0x%x\n",
- ent->device);
+ dev_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n",
+ ent->device);
ret = -ENODEV;
goto bail;
}
+ /* Allocate the dd so we can get to work */
+ dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
+ sizeof(struct hfi1_pportdata));
+ if (IS_ERR(dd)) {
+ ret = PTR_ERR(dd);
+ goto bail;
+ }
+
/* Validate some global module parameters */
- ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
+ ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt);
if (ret)
goto bail;
/* use the encoding function as a sanitization check */
if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
- hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
- hfi1_hdrq_entsize);
+ dd_dev_err(dd, "Invalid HdrQ Entry size %u\n",
+ hfi1_hdrq_entsize);
ret = -EINVAL;
goto bail;
}
@@ -1676,10 +1700,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
clamp_val(eager_buffer_size,
MIN_EAGER_BUFFER * 8,
MAX_EAGER_BUFFER_TOTAL);
- hfi1_early_info(&pdev->dev, "Eager buffer size %u\n",
- eager_buffer_size);
+ dd_dev_info(dd, "Eager buffer size %u\n",
+ eager_buffer_size);
} else {
- hfi1_early_err(&pdev->dev, "Invalid Eager buffer size of 0\n");
+ dd_dev_err(dd, "Invalid Eager buffer size of 0\n");
ret = -EINVAL;
goto bail;
}
@@ -1687,7 +1711,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* restrict value of hfi1_rcvarr_split */
hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100);
- ret = hfi1_pcie_init(pdev, ent);
+ ret = hfi1_pcie_init(dd);
if (ret)
goto bail;
@@ -1695,12 +1719,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
* Do device-specific initialization, function table setup, dd
* allocation, etc.
*/
- dd = hfi1_init_dd(pdev, ent);
-
- if (IS_ERR(dd)) {
- ret = PTR_ERR(dd);
+ ret = hfi1_init_dd(dd);
+ if (ret)
goto clean_bail; /* error already printed */
- }
ret = create_workqueues(dd);
if (ret)
@@ -1731,7 +1752,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j);
if (initfail || ret) {
- hfi1_clean_up_interrupts(dd);
+ msix_clean_up_interrupts(dd);
stop_timers(dd);
flush_workqueue(ib_wq);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c
new file mode 100644
index 000000000000..582f1ba136ff
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/iowait.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#include "iowait.h"
+#include "trace_iowait.h"
+
+void iowait_set_flag(struct iowait *wait, u32 flag)
+{
+ trace_hfi1_iowait_set(wait, flag);
+ set_bit(flag, &wait->flags);
+}
+
+bool iowait_flag_set(struct iowait *wait, u32 flag)
+{
+ return test_bit(flag, &wait->flags);
+}
+
+inline void iowait_clear_flag(struct iowait *wait, u32 flag)
+{
+ trace_hfi1_iowait_clear(wait, flag);
+ clear_bit(flag, &wait->flags);
+}
+
+/**
+ * iowait_init() - initialize wait structure
+ * @wait: wait struct to initialize
+ * @tx_limit: limit for overflow queuing
+ * @func: restart function for workqueue
+ * @sleep: sleep function for no space
+ * @resume: wakeup function for no space
+ *
+ * This function initializes the iowait
+ * structure embedded in the QP or PQ.
+ *
+ */
+void iowait_init(struct iowait *wait, u32 tx_limit,
+ void (*func)(struct work_struct *work),
+ void (*tidfunc)(struct work_struct *work),
+ int (*sleep)(struct sdma_engine *sde,
+ struct iowait_work *wait,
+ struct sdma_txreq *tx,
+ uint seq,
+ bool pkts_sent),
+ void (*wakeup)(struct iowait *wait, int reason),
+ void (*sdma_drained)(struct iowait *wait))
+{
+ int i;
+
+ wait->count = 0;
+ INIT_LIST_HEAD(&wait->list);
+ init_waitqueue_head(&wait->wait_dma);
+ init_waitqueue_head(&wait->wait_pio);
+ atomic_set(&wait->sdma_busy, 0);
+ atomic_set(&wait->pio_busy, 0);
+ wait->tx_limit = tx_limit;
+ wait->sleep = sleep;
+ wait->wakeup = wakeup;
+ wait->sdma_drained = sdma_drained;
+ wait->flags = 0;
+ for (i = 0; i < IOWAIT_SES; i++) {
+ wait->wait[i].iow = wait;
+ INIT_LIST_HEAD(&wait->wait[i].tx_head);
+ if (i == IOWAIT_IB_SE)
+ INIT_WORK(&wait->wait[i].iowork, func);
+ else
+ INIT_WORK(&wait->wait[i].iowork, tidfunc);
+ }
+}
+
+/**
+ * iowait_cancel_work - cancel all work in iowait
+ * @w: the iowait struct
+ */
+void iowait_cancel_work(struct iowait *w)
+{
+ cancel_work_sync(&iowait_get_ib_work(w)->iowork);
+ cancel_work_sync(&iowait_get_tid_work(w)->iowork);
+}
+
+/**
+ * iowait_set_work_flag - set work flag based on leg
+ * @w - the iowait work struct
+ */
+int iowait_set_work_flag(struct iowait_work *w)
+{
+ if (w == &w->iow->wait[IOWAIT_IB_SE]) {
+ iowait_set_flag(w->iow, IOWAIT_PENDING_IB);
+ return IOWAIT_IB_SE;
+ }
+ iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
+ return IOWAIT_TID_SE;
+}
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 3d9c32c7c340..23a58ac0d47c 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_IOWAIT_H
#define _HFI1_IOWAIT_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -49,6 +49,7 @@
#include <linux/list.h>
#include <linux/workqueue.h>
+#include <linux/wait.h>
#include <linux/sched.h>
#include "sdma_txreq.h"
@@ -59,16 +60,47 @@
*/
typedef void (*restart_t)(struct work_struct *work);
+#define IOWAIT_PENDING_IB 0x0
+#define IOWAIT_PENDING_TID 0x1
+
+/*
+ * A QP can have multiple Send Engines (SEs).
+ *
+ * The current use case is for supporting a TID RDMA
+ * packet build/xmit mechanism independent from verbs.
+ */
+#define IOWAIT_SES 2
+#define IOWAIT_IB_SE 0
+#define IOWAIT_TID_SE 1
+
struct sdma_txreq;
struct sdma_engine;
/**
- * struct iowait - linkage for delayed progress/waiting
+ * @iowork: the work struct
+ * @tx_head: list of prebuilt packets
+ * @iow: the parent iowait structure
+ *
+ * This structure is the work item (process) specific
+ * details associated with the each of the two SEs of the
+ * QP.
+ *
+ * The workstruct and the queued TXs are unique to each
+ * SE.
+ */
+struct iowait;
+struct iowait_work {
+ struct work_struct iowork;
+ struct list_head tx_head;
+ struct iowait *iow;
+};
+
+/**
* @list: used to add/insert into QP/PQ wait lists
- * @lock: uses to record the list head lock
* @tx_head: overflow list of sdma_txreq's
* @sleep: no space callback
* @wakeup: space callback wakeup
* @sdma_drained: sdma count drained
+ * @lock: lock protected head of wait queue
* @iowork: workqueue overhead
* @wait_dma: wait for sdma_busy == 0
* @wait_pio: wait for pio_busy == 0
@@ -76,6 +108,8 @@ struct sdma_engine;
* @count: total number of descriptors in tx_head'ed list
* @tx_limit: limit for overflow queuing
* @tx_count: number of tx entry's in tx_head'ed list
+ * @flags: wait flags (one per QP)
+ * @wait: SE array
*
* This is to be embedded in user's state structure
* (QP or PQ).
@@ -98,13 +132,11 @@ struct sdma_engine;
* Waiters explicity know that, but the destroy
* code that unwaits QPs does not.
*/
-
struct iowait {
struct list_head list;
- struct list_head tx_head;
int (*sleep)(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *tx,
uint seq,
bool pkts_sent
@@ -112,7 +144,6 @@ struct iowait {
void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait);
seqlock_t *lock;
- struct work_struct iowork;
wait_queue_head_t wait_dma;
wait_queue_head_t wait_pio;
atomic_t sdma_busy;
@@ -121,63 +152,37 @@ struct iowait {
u32 tx_limit;
u32 tx_count;
u8 starved_cnt;
+ unsigned long flags;
+ struct iowait_work wait[IOWAIT_SES];
};
#define SDMA_AVAIL_REASON 0
-/**
- * iowait_init() - initialize wait structure
- * @wait: wait struct to initialize
- * @tx_limit: limit for overflow queuing
- * @func: restart function for workqueue
- * @sleep: sleep function for no space
- * @resume: wakeup function for no space
- *
- * This function initializes the iowait
- * structure embedded in the QP or PQ.
- *
- */
+void iowait_set_flag(struct iowait *wait, u32 flag);
+bool iowait_flag_set(struct iowait *wait, u32 flag);
+void iowait_clear_flag(struct iowait *wait, u32 flag);
-static inline void iowait_init(
- struct iowait *wait,
- u32 tx_limit,
- void (*func)(struct work_struct *work),
- int (*sleep)(
- struct sdma_engine *sde,
- struct iowait *wait,
- struct sdma_txreq *tx,
- uint seq,
- bool pkts_sent),
- void (*wakeup)(struct iowait *wait, int reason),
- void (*sdma_drained)(struct iowait *wait))
-{
- wait->count = 0;
- wait->lock = NULL;
- INIT_LIST_HEAD(&wait->list);
- INIT_LIST_HEAD(&wait->tx_head);
- INIT_WORK(&wait->iowork, func);
- init_waitqueue_head(&wait->wait_dma);
- init_waitqueue_head(&wait->wait_pio);
- atomic_set(&wait->sdma_busy, 0);
- atomic_set(&wait->pio_busy, 0);
- wait->tx_limit = tx_limit;
- wait->sleep = sleep;
- wait->wakeup = wakeup;
- wait->sdma_drained = sdma_drained;
-}
+void iowait_init(struct iowait *wait, u32 tx_limit,
+ void (*func)(struct work_struct *work),
+ void (*tidfunc)(struct work_struct *work),
+ int (*sleep)(struct sdma_engine *sde,
+ struct iowait_work *wait,
+ struct sdma_txreq *tx,
+ uint seq,
+ bool pkts_sent),
+ void (*wakeup)(struct iowait *wait, int reason),
+ void (*sdma_drained)(struct iowait *wait));
/**
- * iowait_schedule() - initialize wait structure
+ * iowait_schedule() - schedule the default send engine work
* @wait: wait struct to schedule
* @wq: workqueue for schedule
* @cpu: cpu
*/
-static inline void iowait_schedule(
- struct iowait *wait,
- struct workqueue_struct *wq,
- int cpu)
+static inline bool iowait_schedule(struct iowait *wait,
+ struct workqueue_struct *wq, int cpu)
{
- queue_work_on(cpu, wq, &wait->iowork);
+ return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
}
/**
@@ -228,6 +233,8 @@ static inline void iowait_sdma_add(struct iowait *wait, int count)
*/
static inline int iowait_sdma_dec(struct iowait *wait)
{
+ if (!wait)
+ return 0;
return atomic_dec_and_test(&wait->sdma_busy);
}
@@ -267,11 +274,13 @@ static inline void iowait_pio_inc(struct iowait *wait)
}
/**
- * iowait_sdma_dec - note pio complete
+ * iowait_pio_dec - note pio complete
* @wait: iowait structure
*/
static inline int iowait_pio_dec(struct iowait *wait)
{
+ if (!wait)
+ return 0;
return atomic_dec_and_test(&wait->pio_busy);
}
@@ -293,9 +302,9 @@ static inline void iowait_drain_wakeup(struct iowait *wait)
/**
* iowait_get_txhead() - get packet off of iowait list
*
- * @wait wait struture
+ * @wait iowait_work struture
*/
-static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
+static inline struct sdma_txreq *iowait_get_txhead(struct iowait_work *wait)
{
struct sdma_txreq *tx = NULL;
@@ -309,6 +318,28 @@ static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
return tx;
}
+static inline u16 iowait_get_desc(struct iowait_work *w)
+{
+ u16 num_desc = 0;
+ struct sdma_txreq *tx = NULL;
+
+ if (!list_empty(&w->tx_head)) {
+ tx = list_first_entry(&w->tx_head, struct sdma_txreq,
+ list);
+ num_desc = tx->num_desc;
+ }
+ return num_desc;
+}
+
+static inline u32 iowait_get_all_desc(struct iowait *w)
+{
+ u32 num_desc = 0;
+
+ num_desc = iowait_get_desc(&w->wait[IOWAIT_IB_SE]);
+ num_desc += iowait_get_desc(&w->wait[IOWAIT_TID_SE]);
+ return num_desc;
+}
+
/**
* iowait_queue - Put the iowait on a wait queue
* @pkts_sent: have some packets been sent before queuing?
@@ -372,12 +403,57 @@ static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
}
/**
- * iowait_packet_queued() - determine if a packet is already built
- * @wait: the wait structure
+ * iowait_packet_queued() - determine if a packet is queued
+ * @wait: the iowait_work structure
*/
-static inline bool iowait_packet_queued(struct iowait *wait)
+static inline bool iowait_packet_queued(struct iowait_work *wait)
{
return !list_empty(&wait->tx_head);
}
+/**
+ * inc_wait_count - increment wait counts
+ * @w: the log work struct
+ * @n: the count
+ */
+static inline void iowait_inc_wait_count(struct iowait_work *w, u16 n)
+{
+ if (!w)
+ return;
+ w->iow->tx_count++;
+ w->iow->count += n;
+}
+
+/**
+ * iowait_get_tid_work - return iowait_work for tid SE
+ * @w: the iowait struct
+ */
+static inline struct iowait_work *iowait_get_tid_work(struct iowait *w)
+{
+ return &w->wait[IOWAIT_TID_SE];
+}
+
+/**
+ * iowait_get_ib_work - return iowait_work for ib SE
+ * @w: the iowait struct
+ */
+static inline struct iowait_work *iowait_get_ib_work(struct iowait *w)
+{
+ return &w->wait[IOWAIT_IB_SE];
+}
+
+/**
+ * iowait_ioww_to_iow - return iowait given iowait_work
+ * @w: the iowait_work struct
+ */
+static inline struct iowait *iowait_ioww_to_iow(struct iowait_work *w)
+{
+ if (likely(w))
+ return w->iow;
+ return NULL;
+}
+
+void iowait_cancel_work(struct iowait *w);
+int iowait_set_work_flag(struct iowait_work *w);
+
#endif
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 0307405491e0..88a0cf930136 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015-2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -4836,7 +4836,7 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
int ret;
int pkey_idx;
int local_mad = 0;
- u32 resp_len = 0;
+ u32 resp_len = in_wc->byte_len - sizeof(*in_grh);
struct hfi1_ibport *ibp = to_iport(ibdev, port);
pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c
new file mode 100644
index 000000000000..d920b165d696
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/msix.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hfi.h"
+#include "affinity.h"
+#include "sdma.h"
+
+/**
+ * msix_initialize() - Calculate, request and configure MSIx IRQs
+ * @dd: valid hfi1 devdata
+ *
+ */
+int msix_initialize(struct hfi1_devdata *dd)
+{
+ u32 total;
+ int ret;
+ struct hfi1_msix_entry *entries;
+
+ /*
+ * MSIx interrupt count:
+ * one for the general, "slow path" interrupt
+ * one per used SDMA engine
+ * one per kernel receive context
+ * one for each VNIC context
+ * ...any new IRQs should be added here.
+ */
+ total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
+
+ if (total >= CCE_NUM_MSIX_VECTORS)
+ return -EINVAL;
+
+ ret = pci_alloc_irq_vectors(dd->pcidev, total, total, PCI_IRQ_MSIX);
+ if (ret < 0) {
+ dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", ret);
+ return ret;
+ }
+
+ entries = kcalloc(total, sizeof(*dd->msix_info.msix_entries),
+ GFP_KERNEL);
+ if (!entries) {
+ pci_free_irq_vectors(dd->pcidev);
+ return -ENOMEM;
+ }
+
+ dd->msix_info.msix_entries = entries;
+ spin_lock_init(&dd->msix_info.msix_lock);
+ bitmap_zero(dd->msix_info.in_use_msix, total);
+ dd->msix_info.max_requested = total;
+ dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
+
+ return 0;
+}
+
+/**
+ * msix_request_irq() - Allocate a free MSIx IRQ
+ * @dd: valid devdata
+ * @arg: context information for the IRQ
+ * @handler: IRQ handler
+ * @thread: IRQ thread handler (could be NULL)
+ * @idx: zero base idx if multiple devices are needed
+ * @type: affinty IRQ type
+ *
+ * Allocated an MSIx vector if available, and then create the appropriate
+ * meta data needed to keep track of the pci IRQ request.
+ *
+ * Return:
+ * < 0 Error
+ * >= 0 MSIx vector
+ *
+ */
+static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
+ irq_handler_t handler, irq_handler_t thread,
+ u32 idx, enum irq_type type)
+{
+ unsigned long nr;
+ int irq;
+ int ret;
+ const char *err_info;
+ char name[MAX_NAME_SIZE];
+ struct hfi1_msix_entry *me;
+
+ /* Allocate an MSIx vector */
+ spin_lock(&dd->msix_info.msix_lock);
+ nr = find_first_zero_bit(dd->msix_info.in_use_msix,
+ dd->msix_info.max_requested);
+ if (nr < dd->msix_info.max_requested)
+ __set_bit(nr, dd->msix_info.in_use_msix);
+ spin_unlock(&dd->msix_info.msix_lock);
+
+ if (nr == dd->msix_info.max_requested)
+ return -ENOSPC;
+
+ /* Specific verification and determine the name */
+ switch (type) {
+ case IRQ_GENERAL:
+ /* general interrupt must be MSIx vector 0 */
+ if (nr) {
+ spin_lock(&dd->msix_info.msix_lock);
+ __clear_bit(nr, dd->msix_info.in_use_msix);
+ spin_unlock(&dd->msix_info.msix_lock);
+ dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n",
+ nr);
+ return -EINVAL;
+ }
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
+ err_info = "general";
+ break;
+ case IRQ_SDMA:
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
+ dd->unit, idx);
+ err_info = "sdma";
+ break;
+ case IRQ_RCVCTXT:
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
+ dd->unit, idx);
+ err_info = "receive context";
+ break;
+ case IRQ_OTHER:
+ default:
+ return -EINVAL;
+ }
+ name[sizeof(name) - 1] = 0;
+
+ irq = pci_irq_vector(dd->pcidev, nr);
+ ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name);
+ if (ret) {
+ dd_dev_err(dd,
+ "%s: request for IRQ %d failed, MSIx %d, err %d\n",
+ err_info, irq, idx, ret);
+ spin_lock(&dd->msix_info.msix_lock);
+ __clear_bit(nr, dd->msix_info.in_use_msix);
+ spin_unlock(&dd->msix_info.msix_lock);
+ return ret;
+ }
+
+ /*
+ * assign arg after pci_request_irq call, so it will be
+ * cleaned up
+ */
+ me = &dd->msix_info.msix_entries[nr];
+ me->irq = irq;
+ me->arg = arg;
+ me->type = type;
+
+ /* This is a request, so a failure is not fatal */
+ ret = hfi1_get_irq_affinity(dd, me);
+ if (ret)
+ dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
+
+ return nr;
+}
+
+/**
+ * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
+ * @rcd: valid rcd context
+ *
+ */
+int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+{
+ int nr;
+
+ nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt,
+ receive_context_thread, rcd->ctxt, IRQ_RCVCTXT);
+ if (nr < 0)
+ return nr;
+
+ /*
+ * Set the interrupt register and mask for this
+ * context's interrupt.
+ */
+ rcd->ireg = (IS_RCVAVAIL_START + rcd->ctxt) / 64;
+ rcd->imask = ((u64)1) << ((IS_RCVAVAIL_START + rcd->ctxt) % 64);
+ rcd->msix_intr = nr;
+ remap_intr(rcd->dd, IS_RCVAVAIL_START + rcd->ctxt, nr);
+
+ return 0;
+}
+
+/**
+ * msix_request_smda_ira() - Helper for getting SDMA IRQ resources
+ * @sde: valid sdma engine
+ *
+ */
+int msix_request_sdma_irq(struct sdma_engine *sde)
+{
+ int nr;
+
+ nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL,
+ sde->this_idx, IRQ_SDMA);
+ if (nr < 0)
+ return nr;
+ sde->msix_intr = nr;
+ remap_sdma_interrupts(sde->dd, sde->this_idx, nr);
+
+ return 0;
+}
+
+/**
+ * enable_sdma_src() - Helper to enable SDMA IRQ srcs
+ * @dd: valid devdata structure
+ * @i: index of SDMA engine
+ */
+static void enable_sdma_srcs(struct hfi1_devdata *dd, int i)
+{
+ set_intr_bits(dd, IS_SDMA_START + i, IS_SDMA_START + i, true);
+ set_intr_bits(dd, IS_SDMA_PROGRESS_START + i,
+ IS_SDMA_PROGRESS_START + i, true);
+ set_intr_bits(dd, IS_SDMA_IDLE_START + i, IS_SDMA_IDLE_START + i, true);
+ set_intr_bits(dd, IS_SDMAENG_ERR_START + i, IS_SDMAENG_ERR_START + i,
+ true);
+}
+
+/**
+ * msix_request_irqs() - Allocate all MSIx IRQs
+ * @dd: valid devdata structure
+ *
+ * Helper function to request the used MSIx IRQs.
+ *
+ */
+int msix_request_irqs(struct hfi1_devdata *dd)
+{
+ int i;
+ int ret;
+
+ ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; i < dd->num_sdma; i++) {
+ struct sdma_engine *sde = &dd->per_sdma[i];
+
+ ret = msix_request_sdma_irq(sde);
+ if (ret)
+ return ret;
+ enable_sdma_srcs(sde->dd, i);
+ }
+
+ for (i = 0; i < dd->n_krcv_queues; i++) {
+ struct hfi1_ctxtdata *rcd = hfi1_rcd_get_by_index_safe(dd, i);
+
+ if (rcd)
+ ret = msix_request_rcd_irq(rcd);
+ hfi1_rcd_put(rcd);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * msix_free_irq() - Free the specified MSIx resources and IRQ
+ * @dd: valid devdata
+ * @msix_intr: MSIx vector to free.
+ *
+ */
+void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr)
+{
+ struct hfi1_msix_entry *me;
+
+ if (msix_intr >= dd->msix_info.max_requested)
+ return;
+
+ me = &dd->msix_info.msix_entries[msix_intr];
+
+ if (!me->arg) /* => no irq, no affinity */
+ return;
+
+ hfi1_put_irq_affinity(dd, me);
+ pci_free_irq(dd->pcidev, msix_intr, me->arg);
+
+ me->arg = NULL;
+
+ spin_lock(&dd->msix_info.msix_lock);
+ __clear_bit(msix_intr, dd->msix_info.in_use_msix);
+ spin_unlock(&dd->msix_info.msix_lock);
+}
+
+/**
+ * hfi1_clean_up_msix_interrupts() - Free all MSIx IRQ resources
+ * @dd: valid device data data structure
+ *
+ * Free the MSIx and associated PCI resources, if they have been allocated.
+ */
+void msix_clean_up_interrupts(struct hfi1_devdata *dd)
+{
+ int i;
+ struct hfi1_msix_entry *me = dd->msix_info.msix_entries;
+
+ /* remove irqs - must happen before disabling/turning off */
+ for (i = 0; i < dd->msix_info.max_requested; i++, me++)
+ msix_free_irq(dd, i);
+
+ /* clean structures */
+ kfree(dd->msix_info.msix_entries);
+ dd->msix_info.msix_entries = NULL;
+ dd->msix_info.max_requested = 0;
+
+ pci_free_irq_vectors(dd->pcidev);
+}
+
+/**
+ * msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize
+ * @dd: valid devdata
+ */
+void msix_vnic_synchronize_irq(struct hfi1_devdata *dd)
+{
+ int i;
+
+ for (i = 0; i < dd->vnic.num_ctxt; i++) {
+ struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
+ struct hfi1_msix_entry *me;
+
+ me = &dd->msix_info.msix_entries[rcd->msix_intr];
+
+ synchronize_irq(me->irq);
+ }
+}
diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h
new file mode 100644
index 000000000000..a514881632a4
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/msix.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _HFI1_MSIX_H
+#define _HFI1_MSIX_H
+
+#include "hfi.h"
+
+/* MSIx interface */
+int msix_initialize(struct hfi1_devdata *dd);
+int msix_request_irqs(struct hfi1_devdata *dd);
+void msix_clean_up_interrupts(struct hfi1_devdata *dd);
+int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd);
+int msix_request_sdma_irq(struct sdma_engine *sde);
+void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr);
+
+/* VNIC interface */
+void msix_vnic_synchronize_irq(struct hfi1_devdata *dd);
+
+#endif
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index eec83757d55f..cb7fc9fb3c9e 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -61,19 +61,12 @@
*/
/*
- * Code to adjust PCIe capabilities.
- */
-static void tune_pcie_caps(struct hfi1_devdata *);
-
-/*
* Do all the common PCIe setup and initialization.
- * devdata is not yet allocated, and is not allocated until after this
- * routine returns success. Therefore dd_dev_err() can't be used for error
- * printing.
*/
-int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
+int hfi1_pcie_init(struct hfi1_devdata *dd)
{
int ret;
+ struct pci_dev *pdev = dd->pcidev;
ret = pci_enable_device(pdev);
if (ret) {
@@ -89,15 +82,13 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
* about that, it appears. If the original BAR was retained
* in the kernel data structures, this may be OK.
*/
- hfi1_early_err(&pdev->dev, "pci enable failed: error %d\n",
- -ret);
- goto done;
+ dd_dev_err(dd, "pci enable failed: error %d\n", -ret);
+ return ret;
}
ret = pci_request_regions(pdev, DRIVER_NAME);
if (ret) {
- hfi1_early_err(&pdev->dev,
- "pci_request_regions fails: err %d\n", -ret);
+ dd_dev_err(dd, "pci_request_regions fails: err %d\n", -ret);
goto bail;
}
@@ -110,8 +101,7 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
*/
ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (ret) {
- hfi1_early_err(&pdev->dev,
- "Unable to set DMA mask: %d\n", ret);
+ dd_dev_err(dd, "Unable to set DMA mask: %d\n", ret);
goto bail;
}
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
@@ -119,18 +109,16 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
}
if (ret) {
- hfi1_early_err(&pdev->dev,
- "Unable to set DMA consistent mask: %d\n", ret);
+ dd_dev_err(dd, "Unable to set DMA consistent mask: %d\n", ret);
goto bail;
}
pci_set_master(pdev);
(void)pci_enable_pcie_error_reporting(pdev);
- goto done;
+ return 0;
bail:
hfi1_pcie_cleanup(pdev);
-done:
return ret;
}
@@ -206,7 +194,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
dd_dev_err(dd, "WC mapping of send buffers failed\n");
goto nomem;
}
- dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE);
+ dd_dev_info(dd, "WC piobase: %p for %x\n", dd->piobase, TXE_PIO_SIZE);
dd->physaddr = addr; /* used for io_remap, etc. */
@@ -344,26 +332,6 @@ int pcie_speeds(struct hfi1_devdata *dd)
return 0;
}
-/*
- * Returns:
- * - actual number of interrupts allocated or
- * - error
- */
-int request_msix(struct hfi1_devdata *dd, u32 msireq)
-{
- int nvec;
-
- nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX);
- if (nvec < 0) {
- dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec);
- return nvec;
- }
-
- tune_pcie_caps(dd);
-
- return nvec;
-}
-
/* restore command and BARs after a reset has wiped them out */
int restore_pci_variables(struct hfi1_devdata *dd)
{
@@ -479,14 +447,19 @@ error:
* Check and optionally adjust them to maximize our throughput.
*/
static int hfi1_pcie_caps;
-module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO);
+module_param_named(pcie_caps, hfi1_pcie_caps, int, 0444);
MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
uint aspm_mode = ASPM_MODE_DISABLED;
-module_param_named(aspm, aspm_mode, uint, S_IRUGO);
+module_param_named(aspm, aspm_mode, uint, 0444);
MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
-static void tune_pcie_caps(struct hfi1_devdata *dd)
+/**
+ * tune_pcie_caps() - Code to adjust PCIe capabilities.
+ * @dd: Valid device data structure
+ *
+ */
+void tune_pcie_caps(struct hfi1_devdata *dd)
{
struct pci_dev *parent;
u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
@@ -1032,6 +1005,7 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
const u8 (*ctle_tunings)[4];
uint static_ctle_mode;
int return_error = 0;
+ u32 target_width;
/* PCIe Gen3 is for the ASIC only */
if (dd->icode != ICODE_RTL_SILICON)
@@ -1071,6 +1045,9 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
return 0;
}
+ /* Previous Gen1/Gen2 bus width */
+ target_width = dd->lbus_width;
+
/*
* Do the Gen3 transition. Steps are those of the PCIe Gen3
* recipe.
@@ -1439,11 +1416,12 @@ retry:
dd_dev_info(dd, "%s: new speed and width: %s\n", __func__,
dd->lbus_info);
- if (dd->lbus_speed != target_speed) { /* not target */
+ if (dd->lbus_speed != target_speed ||
+ dd->lbus_width < target_width) { /* not target */
/* maybe retry */
do_retry = retry_count < pcie_retry;
- dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n",
- pcie_target, do_retry ? ", retrying" : "");
+ dd_dev_err(dd, "PCIe link speed or width did not match target%s\n",
+ do_retry ? ", retrying" : "");
retry_count++;
if (do_retry) {
msleep(100); /* allow time to settle */
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index c2c1cba5b23b..9ab50d2308dc 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -71,14 +71,6 @@ void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl)
}
}
-/* defined in header release 48 and higher */
-#ifndef SEND_CTRL_UNSUPPORTED_VL_SHIFT
-#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
-#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xffull
-#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
- << SEND_CTRL_UNSUPPORTED_VL_SHIFT)
-#endif
-
/* global control of PIO send */
void pio_send_control(struct hfi1_devdata *dd, int op)
{
@@ -86,6 +78,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
unsigned long flags;
int write = 1; /* write sendctrl back */
int flush = 0; /* re-read sendctrl to make sure it is flushed */
+ int i;
spin_lock_irqsave(&dd->sendctrl_lock, flags);
@@ -95,9 +88,13 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
reg |= SEND_CTRL_SEND_ENABLE_SMASK;
/* Fall through */
case PSC_DATA_VL_ENABLE:
+ mask = 0;
+ for (i = 0; i < ARRAY_SIZE(dd->vld); i++)
+ if (!dd->vld[i].mtu)
+ mask |= BIT_ULL(i);
/* Disallow sending on VLs not enabled */
- mask = (((~0ull) << num_vls) & SEND_CTRL_UNSUPPORTED_VL_MASK) <<
- SEND_CTRL_UNSUPPORTED_VL_SHIFT;
+ mask = (mask & SEND_CTRL_UNSUPPORTED_VL_MASK) <<
+ SEND_CTRL_UNSUPPORTED_VL_SHIFT;
reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask;
break;
case PSC_GLOBAL_DISABLE:
@@ -921,20 +918,18 @@ void sc_free(struct send_context *sc)
void sc_disable(struct send_context *sc)
{
u64 reg;
- unsigned long flags;
struct pio_buf *pbuf;
if (!sc)
return;
/* do all steps, even if already disabled */
- spin_lock_irqsave(&sc->alloc_lock, flags);
+ spin_lock_irq(&sc->alloc_lock);
reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL));
reg &= ~SC(CTRL_CTXT_ENABLE_SMASK);
sc->flags &= ~SCF_ENABLED;
sc_wait_for_packet_egress(sc, 1);
write_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL), reg);
- spin_unlock_irqrestore(&sc->alloc_lock, flags);
/*
* Flush any waiters. Once the context is disabled,
@@ -944,7 +939,7 @@ void sc_disable(struct send_context *sc)
* proceed with the flush.
*/
udelay(1);
- spin_lock_irqsave(&sc->release_lock, flags);
+ spin_lock(&sc->release_lock);
if (sc->sr) { /* this context has a shadow ring */
while (sc->sr_tail != sc->sr_head) {
pbuf = &sc->sr[sc->sr_tail].pbuf;
@@ -955,7 +950,8 @@ void sc_disable(struct send_context *sc)
sc->sr_tail = 0;
}
}
- spin_unlock_irqrestore(&sc->release_lock, flags);
+ spin_unlock(&sc->release_lock);
+ spin_unlock_irq(&sc->alloc_lock);
}
/* return SendEgressCtxtStatus.PacketOccupancy */
@@ -1178,11 +1174,39 @@ void pio_kernel_unfreeze(struct hfi1_devdata *dd)
sc = dd->send_contexts[i].sc;
if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER)
continue;
+ if (sc->flags & SCF_LINK_DOWN)
+ continue;
sc_enable(sc); /* will clear the sc frozen flag */
}
}
+/**
+ * pio_kernel_linkup() - Re-enable send contexts after linkup event
+ * @dd: valid devive data
+ *
+ * When the link goes down, the freeze path is taken. However, a link down
+ * event is different from a freeze because if the send context is re-enabled
+ * whowever is sending data will start sending data again, which will hang
+ * any QP that is sending data.
+ *
+ * The freeze path now looks at the type of event that occurs and takes this
+ * path for link down event.
+ */
+void pio_kernel_linkup(struct hfi1_devdata *dd)
+{
+ struct send_context *sc;
+ int i;
+
+ for (i = 0; i < dd->num_send_contexts; i++) {
+ sc = dd->send_contexts[i].sc;
+ if (!sc || !(sc->flags & SCF_LINK_DOWN) || sc->type == SC_USER)
+ continue;
+
+ sc_enable(sc); /* will clear the sc link down flag */
+ }
+}
+
/*
* Wait for the SendPioInitCtxt.PioInitInProgress bit to clear.
* Returns:
@@ -1382,11 +1406,10 @@ void sc_stop(struct send_context *sc, int flag)
{
unsigned long flags;
- /* mark the context */
- sc->flags |= flag;
-
/* stop buffer allocations */
spin_lock_irqsave(&sc->alloc_lock, flags);
+ /* mark the context */
+ sc->flags |= flag;
sc->flags &= ~SCF_ENABLED;
spin_unlock_irqrestore(&sc->alloc_lock, flags);
wake_up(&sc->halt_wait);
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 058b08f459ab..aaf372c3e5d6 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -139,6 +139,7 @@ struct send_context {
#define SCF_IN_FREE 0x02
#define SCF_HALTED 0x04
#define SCF_FROZEN 0x08
+#define SCF_LINK_DOWN 0x10
struct send_context_info {
struct send_context *sc; /* allocated working context */
@@ -306,6 +307,7 @@ void set_pio_integrity(struct send_context *sc);
void pio_reset_all(struct hfi1_devdata *dd);
void pio_freeze(struct hfi1_devdata *dd);
void pio_kernel_unfreeze(struct hfi1_devdata *dd);
+void pio_kernel_linkup(struct hfi1_devdata *dd);
/* global PIO send control operations */
#define PSC_GLOBAL_ENABLE 0
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 9b1e84a6b1cc..6f3bc4dab858 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -66,7 +66,7 @@ MODULE_PARM_DESC(qp_table_size, "QP table size");
static void flush_tx_list(struct rvt_qp *qp);
static int iowait_sleep(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *stx,
unsigned int seq,
bool pkts_sent);
@@ -134,15 +134,13 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
};
-static void flush_tx_list(struct rvt_qp *qp)
+static void flush_list_head(struct list_head *l)
{
- struct hfi1_qp_priv *priv = qp->priv;
-
- while (!list_empty(&priv->s_iowait.tx_head)) {
+ while (!list_empty(l)) {
struct sdma_txreq *tx;
tx = list_first_entry(
- &priv->s_iowait.tx_head,
+ l,
struct sdma_txreq,
list);
list_del_init(&tx->list);
@@ -151,6 +149,14 @@ static void flush_tx_list(struct rvt_qp *qp)
}
}
+static void flush_tx_list(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head);
+ flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head);
+}
+
static void flush_iowait(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
@@ -282,33 +288,46 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
}
/**
- * hfi1_check_send_wqe - validate wqe
+ * hfi1_setup_wqe - set up the wqe
* @qp - The qp
* @wqe - The built wqe
+ * @call_send - Determine if the send should be posted or scheduled.
*
- * validate wqe. This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
+ * Perform setup of the wqe. This is called
+ * prior to inserting the wqe into the ring but after
+ * the wqe has been setup by RDMAVT. This function
+ * allows the driver the opportunity to perform
+ * validation and additional setup of the wqe.
*
* Returns 0 on success, -EINVAL on failure
*
*/
-int hfi1_check_send_wqe(struct rvt_qp *qp,
- struct rvt_swqe *wqe)
+int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct rvt_ah *ah;
+ struct hfi1_pportdata *ppd;
+ struct hfi1_devdata *dd;
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
+ if (wqe->length > qp->pmtu)
+ *call_send = false;
break;
case IB_QPT_SMI:
- ah = ibah_to_rvtah(wqe->ud_wr.ah);
- if (wqe->length > (1 << ah->log_pmtu))
+ /*
+ * SM packets should exclusively use VL15 and their SL is
+ * ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah
+ * is created, SL is 0 in most cases and as a result some
+ * fields (vl and pmtu) in ah may not be set correctly,
+ * depending on the SL2SC and SC2VL tables at the time.
+ */
+ ppd = ppd_from_ibp(ibp);
+ dd = dd_from_ppd(ppd);
+ if (wqe->length > dd->vld[15].mtu)
return -EINVAL;
break;
case IB_QPT_GSI:
@@ -321,7 +340,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
default:
break;
}
- return wqe->length <= piothreshold;
+ return 0;
}
/**
@@ -333,7 +352,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
* It is only used in the post send, which doesn't hold
* the s_lock.
*/
-void _hfi1_schedule_send(struct rvt_qp *qp)
+bool _hfi1_schedule_send(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibport *ibp =
@@ -341,10 +360,10 @@ void _hfi1_schedule_send(struct rvt_qp *qp)
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
- iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
- priv->s_sde ?
- priv->s_sde->cpu :
- cpumask_first(cpumask_of_node(dd->node)));
+ return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
+ priv->s_sde ?
+ priv->s_sde->cpu :
+ cpumask_first(cpumask_of_node(dd->node)));
}
static void qp_pio_drain(struct rvt_qp *qp)
@@ -372,12 +391,32 @@ static void qp_pio_drain(struct rvt_qp *qp)
*
* This schedules qp progress and caller should hold
* the s_lock.
+ * @return true if the first leg is scheduled;
+ * false if the first leg is not scheduled.
*/
-void hfi1_schedule_send(struct rvt_qp *qp)
+bool hfi1_schedule_send(struct rvt_qp *qp)
{
lockdep_assert_held(&qp->s_lock);
- if (hfi1_send_ok(qp))
+ if (hfi1_send_ok(qp)) {
_hfi1_schedule_send(qp);
+ return true;
+ }
+ if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
+ iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
+ IOWAIT_PENDING_IB);
+ return false;
+}
+
+static void hfi1_qp_schedule(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ bool ret;
+
+ if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) {
+ ret = hfi1_schedule_send(qp);
+ if (ret)
+ iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
+ }
}
void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
@@ -388,16 +427,22 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
if (qp->s_flags & flag) {
qp->s_flags &= ~flag;
trace_hfi1_qpwakeup(qp, flag);
- hfi1_schedule_send(qp);
+ hfi1_qp_schedule(qp);
}
spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify hfi1_destroy_qp() if it is waiting. */
rvt_put_qp(qp);
}
+void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
+{
+ if (iowait_set_work_flag(wait) == IOWAIT_IB_SE)
+ qp->s_flags &= ~RVT_S_BUSY;
+}
+
static int iowait_sleep(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *stx,
uint seq,
bool pkts_sent)
@@ -438,7 +483,7 @@ static int iowait_sleep(
rvt_get_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
- qp->s_flags &= ~RVT_S_BUSY;
+ hfi1_qp_unbusy(qp, wait);
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EBUSY;
} else {
@@ -637,6 +682,7 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
&priv->s_iowait,
1,
_hfi1_do_send,
+ NULL,
iowait_sleep,
iowait_wakeup,
iowait_sdma_drained);
@@ -686,7 +732,7 @@ void stop_send_queue(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
- cancel_work_sync(&priv->s_iowait.iowork);
+ iowait_cancel_work(&priv->s_iowait);
}
void quiesce_qp(struct rvt_qp *qp)
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index 078cff7560b6..7adb6dff6813 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -58,18 +58,6 @@ extern unsigned int hfi1_qp_table_size;
extern const struct rvt_operation_params hfi1_post_parms[];
/*
- * Send if not busy or waiting for I/O and either
- * a RC response is pending or we can process send work requests.
- */
-static inline int hfi1_send_ok(struct rvt_qp *qp)
-{
- return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
- (verbs_txreq_queued(qp) ||
- (qp->s_flags & RVT_S_RESP_PENDING) ||
- !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
-}
-
-/*
* Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK
*
* HFI1_S_AHG_VALID - ahg header valid on chip
@@ -90,6 +78,20 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
/*
+ * Send if not busy or waiting for I/O and either
+ * a RC response is pending or we can process send work requests.
+ */
+static inline int hfi1_send_ok(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ return !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)) &&
+ (verbs_txreq_queued(iowait_get_ib_work(&priv->s_iowait)) ||
+ (qp->s_flags & RVT_S_RESP_PENDING) ||
+ !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
+}
+
+/*
* free_ahg - clear ahg from QP
*/
static inline void clear_ahg(struct rvt_qp *qp)
@@ -129,8 +131,8 @@ struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5);
void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter);
-void _hfi1_schedule_send(struct rvt_qp *qp);
-void hfi1_schedule_send(struct rvt_qp *qp);
+bool _hfi1_schedule_send(struct rvt_qp *qp);
+bool hfi1_schedule_send(struct rvt_qp *qp);
void hfi1_migrate_qp(struct rvt_qp *qp);
@@ -150,4 +152,5 @@ void quiesce_qp(struct rvt_qp *qp);
u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
int mtu_to_path_mtu(u32 mtu);
void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl);
+void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait);
#endif /* _QP_H */
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 9bd63abb2dfe..188aa4f686a0 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -309,7 +309,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
clear_ahg(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
+ rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
/* will get called again */
goto done_free_tx;
@@ -378,9 +378,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
wqe->wr.ex.invalidate_rkey);
local_ops = 1;
}
- hfi1_send_complete(qp, wqe,
- err ? IB_WC_LOC_PROT_ERR
- : IB_WC_SUCCESS);
+ rvt_send_complete(qp, wqe,
+ err ? IB_WC_LOC_PROT_ERR
+ : IB_WC_SUCCESS);
if (local_ops)
atomic_dec(&qp->local_ops_pending);
goto done_free_tx;
@@ -1043,7 +1043,7 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
hfi1_migrate_qp(qp);
qp->s_retry = qp->s_retry_cnt;
} else if (qp->s_last == qp->s_acked) {
- hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
return;
} else { /* need to handle delayed completion */
@@ -1468,7 +1468,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
ibp->rvp.n_other_naks++;
class_b:
if (qp->s_last == qp->s_acked) {
- hfi1_send_complete(qp, wqe, status);
+ rvt_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
break;
@@ -1644,7 +1644,8 @@ read_middle:
qp->s_rdma_read_len -= pmtu;
update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags);
- hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, false, false);
+ rvt_copy_sge(qp, &qp->s_rdma_read_sge,
+ data, pmtu, false, false);
goto bail;
case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1684,7 +1685,8 @@ read_last:
if (unlikely(tlen != qp->s_rdma_read_len))
goto ack_len_err;
aeth = be32_to_cpu(ohdr->u.aeth);
- hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, false, false);
+ rvt_copy_sge(qp, &qp->s_rdma_read_sge,
+ data, tlen, false, false);
WARN_ON(qp->s_rdma_read_sge.num_sge);
(void)do_rc_ack(qp, aeth, psn,
OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
@@ -1704,7 +1706,7 @@ ack_len_err:
status = IB_WC_LOC_LEN_ERR;
ack_err:
if (qp->s_last == qp->s_acked) {
- hfi1_send_complete(qp, wqe, status);
+ rvt_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
ack_done:
@@ -2144,7 +2146,7 @@ send_middle:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto nack_inv;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -2200,7 +2202,7 @@ send_last:
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
- hfi1_copy_sge(&qp->r_sge, data, tlen, true, copy_last);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, copy_last);
rvt_put_ss(&qp->r_sge);
qp->r_msn++;
if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 5f56f3c1b4c4..7fb317c711df 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -156,333 +156,6 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet)
}
/**
- * ruc_loopback - handle UC and RC loopback requests
- * @sqp: the sending QP
- *
- * This is called from hfi1_do_send() to
- * forward a WQE addressed to the same HFI.
- * Note that although we are single threaded due to the send engine, we still
- * have to protect against post_send(). We don't have to worry about
- * receive interrupts since this is a connected protocol and all packets
- * will pass through here.
- */
-static void ruc_loopback(struct rvt_qp *sqp)
-{
- struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
- struct rvt_qp *qp;
- struct rvt_swqe *wqe;
- struct rvt_sge *sge;
- unsigned long flags;
- struct ib_wc wc;
- u64 sdata;
- atomic64_t *maddr;
- enum ib_wc_status send_status;
- bool release;
- int ret;
- bool copy_last = false;
- int local_ops = 0;
-
- rcu_read_lock();
-
- /*
- * Note that we check the responder QP state after
- * checking the requester's state.
- */
- qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
- sqp->remote_qpn);
-
- spin_lock_irqsave(&sqp->s_lock, flags);
-
- /* Return if we are already busy processing a work request. */
- if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) ||
- !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
- goto unlock;
-
- sqp->s_flags |= RVT_S_BUSY;
-
-again:
- if (sqp->s_last == READ_ONCE(sqp->s_head))
- goto clr_busy;
- wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
-
- /* Return if it is not OK to start a new work request. */
- if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
- if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
- goto clr_busy;
- /* We are in the error state, flush the work request. */
- send_status = IB_WC_WR_FLUSH_ERR;
- goto flush_send;
- }
-
- /*
- * We can rely on the entry not changing without the s_lock
- * being held until we update s_last.
- * We increment s_cur to indicate s_last is in progress.
- */
- if (sqp->s_last == sqp->s_cur) {
- if (++sqp->s_cur >= sqp->s_size)
- sqp->s_cur = 0;
- }
- spin_unlock_irqrestore(&sqp->s_lock, flags);
-
- if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
- qp->ibqp.qp_type != sqp->ibqp.qp_type) {
- ibp->rvp.n_pkt_drops++;
- /*
- * For RC, the requester would timeout and retry so
- * shortcut the timeouts and just signal too many retries.
- */
- if (sqp->ibqp.qp_type == IB_QPT_RC)
- send_status = IB_WC_RETRY_EXC_ERR;
- else
- send_status = IB_WC_SUCCESS;
- goto serr;
- }
-
- memset(&wc, 0, sizeof(wc));
- send_status = IB_WC_SUCCESS;
-
- release = true;
- sqp->s_sge.sge = wqe->sg_list[0];
- sqp->s_sge.sg_list = wqe->sg_list + 1;
- sqp->s_sge.num_sge = wqe->wr.num_sge;
- sqp->s_len = wqe->length;
- switch (wqe->wr.opcode) {
- case IB_WR_REG_MR:
- goto send_comp;
-
- case IB_WR_LOCAL_INV:
- if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
- if (rvt_invalidate_rkey(sqp,
- wqe->wr.ex.invalidate_rkey))
- send_status = IB_WC_LOC_PROT_ERR;
- local_ops = 1;
- }
- goto send_comp;
-
- case IB_WR_SEND_WITH_INV:
- if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
- wc.wc_flags = IB_WC_WITH_INVALIDATE;
- wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
- }
- goto send;
-
- case IB_WR_SEND_WITH_IMM:
- wc.wc_flags = IB_WC_WITH_IMM;
- wc.ex.imm_data = wqe->wr.ex.imm_data;
- /* FALLTHROUGH */
- case IB_WR_SEND:
-send:
- ret = rvt_get_rwqe(qp, false);
- if (ret < 0)
- goto op_err;
- if (!ret)
- goto rnr_nak;
- break;
-
- case IB_WR_RDMA_WRITE_WITH_IMM:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
- goto inv_err;
- wc.wc_flags = IB_WC_WITH_IMM;
- wc.ex.imm_data = wqe->wr.ex.imm_data;
- ret = rvt_get_rwqe(qp, true);
- if (ret < 0)
- goto op_err;
- if (!ret)
- goto rnr_nak;
- /* skip copy_last set and qp_access_flags recheck */
- goto do_write;
- case IB_WR_RDMA_WRITE:
- copy_last = rvt_is_user_qp(qp);
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
- goto inv_err;
-do_write:
- if (wqe->length == 0)
- break;
- if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
- wqe->rdma_wr.remote_addr,
- wqe->rdma_wr.rkey,
- IB_ACCESS_REMOTE_WRITE)))
- goto acc_err;
- qp->r_sge.sg_list = NULL;
- qp->r_sge.num_sge = 1;
- qp->r_sge.total_len = wqe->length;
- break;
-
- case IB_WR_RDMA_READ:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
- goto inv_err;
- if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
- wqe->rdma_wr.remote_addr,
- wqe->rdma_wr.rkey,
- IB_ACCESS_REMOTE_READ)))
- goto acc_err;
- release = false;
- sqp->s_sge.sg_list = NULL;
- sqp->s_sge.num_sge = 1;
- qp->r_sge.sge = wqe->sg_list[0];
- qp->r_sge.sg_list = wqe->sg_list + 1;
- qp->r_sge.num_sge = wqe->wr.num_sge;
- qp->r_sge.total_len = wqe->length;
- break;
-
- case IB_WR_ATOMIC_CMP_AND_SWP:
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
- goto inv_err;
- if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
- wqe->atomic_wr.remote_addr,
- wqe->atomic_wr.rkey,
- IB_ACCESS_REMOTE_ATOMIC)))
- goto acc_err;
- /* Perform atomic OP and save result. */
- maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
- sdata = wqe->atomic_wr.compare_add;
- *(u64 *)sqp->s_sge.sge.vaddr =
- (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
- (u64)atomic64_add_return(sdata, maddr) - sdata :
- (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
- sdata, wqe->atomic_wr.swap);
- rvt_put_mr(qp->r_sge.sge.mr);
- qp->r_sge.num_sge = 0;
- goto send_comp;
-
- default:
- send_status = IB_WC_LOC_QP_OP_ERR;
- goto serr;
- }
-
- sge = &sqp->s_sge.sge;
- while (sqp->s_len) {
- u32 len = sqp->s_len;
-
- if (len > sge->length)
- len = sge->length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- WARN_ON_ONCE(len == 0);
- hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (!release)
- rvt_put_mr(sge->mr);
- if (--sqp->s_sge.num_sge)
- *sge = *sqp->s_sge.sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
- sqp->s_len -= len;
- }
- if (release)
- rvt_put_ss(&qp->r_sge);
-
- if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
- goto send_comp;
-
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
- wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
- else
- wc.opcode = IB_WC_RECV;
- wc.wr_id = qp->r_wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- wc.src_qp = qp->remote_qpn;
- wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
- wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
- wc.port_num = 1;
- /* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- wqe->wr.send_flags & IB_SEND_SOLICITED);
-
-send_comp:
- spin_lock_irqsave(&sqp->s_lock, flags);
- ibp->rvp.n_loop_pkts++;
-flush_send:
- sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
- hfi1_send_complete(sqp, wqe, send_status);
- if (local_ops) {
- atomic_dec(&sqp->local_ops_pending);
- local_ops = 0;
- }
- goto again;
-
-rnr_nak:
- /* Handle RNR NAK */
- if (qp->ibqp.qp_type == IB_QPT_UC)
- goto send_comp;
- ibp->rvp.n_rnr_naks++;
- /*
- * Note: we don't need the s_lock held since the BUSY flag
- * makes this single threaded.
- */
- if (sqp->s_rnr_retry == 0) {
- send_status = IB_WC_RNR_RETRY_EXC_ERR;
- goto serr;
- }
- if (sqp->s_rnr_retry_cnt < 7)
- sqp->s_rnr_retry--;
- spin_lock_irqsave(&sqp->s_lock, flags);
- if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
- goto clr_busy;
- rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
- IB_AETH_CREDIT_SHIFT);
- goto clr_busy;
-
-op_err:
- send_status = IB_WC_REM_OP_ERR;
- wc.status = IB_WC_LOC_QP_OP_ERR;
- goto err;
-
-inv_err:
- send_status = IB_WC_REM_INV_REQ_ERR;
- wc.status = IB_WC_LOC_QP_OP_ERR;
- goto err;
-
-acc_err:
- send_status = IB_WC_REM_ACCESS_ERR;
- wc.status = IB_WC_LOC_PROT_ERR;
-err:
- /* responder goes to error state */
- rvt_rc_error(qp, wc.status);
-
-serr:
- spin_lock_irqsave(&sqp->s_lock, flags);
- hfi1_send_complete(sqp, wqe, send_status);
- if (sqp->ibqp.qp_type == IB_QPT_RC) {
- int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
-
- sqp->s_flags &= ~RVT_S_BUSY;
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- if (lastwqe) {
- struct ib_event ev;
-
- ev.device = sqp->ibqp.device;
- ev.element.qp = &sqp->ibqp;
- ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
- sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
- }
- goto done;
- }
-clr_busy:
- sqp->s_flags &= ~RVT_S_BUSY;
-unlock:
- spin_unlock_irqrestore(&sqp->s_lock, flags);
-done:
- rcu_read_unlock();
-}
-
-/**
* hfi1_make_grh - construct a GRH header
* @ibp: a pointer to the IB port
* @hdr: a pointer to the GRH header being constructed
@@ -825,8 +498,8 @@ void hfi1_do_send_from_rvt(struct rvt_qp *qp)
void _hfi1_do_send(struct work_struct *work)
{
- struct iowait *wait = container_of(work, struct iowait, iowork);
- struct rvt_qp *qp = iowait_to_qp(wait);
+ struct iowait_work *w = container_of(work, struct iowait_work, iowork);
+ struct rvt_qp *qp = iowait_to_qp(w->iow);
hfi1_do_send(qp, true);
}
@@ -850,6 +523,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
ps.ppd = ppd_from_ibp(ps.ibp);
ps.in_thread = in_thread;
+ ps.wait = iowait_get_ib_work(&priv->s_iowait);
trace_hfi1_rc_do_send(qp, in_thread);
@@ -858,7 +532,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
~((1 << ps.ppd->lmc) - 1)) ==
ps.ppd->lid)) {
- ruc_loopback(qp);
+ rvt_ruc_loopback(qp);
return;
}
make_req = hfi1_make_rc_req;
@@ -868,7 +542,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
~((1 << ps.ppd->lmc) - 1)) ==
ps.ppd->lid)) {
- ruc_loopback(qp);
+ rvt_ruc_loopback(qp);
return;
}
make_req = hfi1_make_uc_req;
@@ -883,6 +557,8 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
/* Return if we are already busy processing a work request. */
if (!hfi1_send_ok(qp)) {
+ if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
+ iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
return;
}
@@ -896,7 +572,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
ps.pkts_sent = false;
/* insure a pre-built packet is handled */
- ps.s_txreq = get_waiting_verbs_txreq(qp);
+ ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
do {
/* Check for a constructed packet to be sent. */
if (ps.s_txreq) {
@@ -907,6 +583,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
*/
if (hfi1_verbs_send(qp, &ps))
return;
+
/* allow other tasks to run */
if (schedule_send_yield(qp, &ps))
return;
@@ -917,44 +594,3 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
}
-
-/*
- * This should be called with s_lock held.
- */
-void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
- enum ib_wc_status status)
-{
- u32 old_last, last;
-
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
- return;
-
- last = qp->s_last;
- old_last = last;
- trace_hfi1_qp_send_completion(qp, wqe, last);
- if (++last >= qp->s_size)
- last = 0;
- trace_hfi1_qp_send_completion(qp, wqe, last);
- qp->s_last = last;
- /* See post_send() */
- barrier();
- rvt_put_swqe(wqe);
- if (qp->ibqp.qp_type == IB_QPT_UD ||
- qp->ibqp.qp_type == IB_QPT_SMI ||
- qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
-
- rvt_qp_swqe_complete(qp,
- wqe,
- ib_hfi1_wc_opcode[wqe->wr.opcode],
- status);
-
- if (qp->s_acked == old_last)
- qp->s_acked = last;
- if (qp->s_cur == old_last)
- qp->s_cur = last;
- if (qp->s_tail == old_last)
- qp->s_tail = last;
- if (qp->state == IB_QPS_SQD && last == qp->s_cur)
- qp->s_draining = 0;
-}
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 88e326d6cc49..891d2386d1ca 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -378,7 +378,7 @@ static inline void complete_tx(struct sdma_engine *sde,
__sdma_txclean(sde->dd, tx);
if (complete)
(*complete)(tx, res);
- if (wait && iowait_sdma_dec(wait))
+ if (iowait_sdma_dec(wait))
iowait_drain_wakeup(wait);
}
@@ -1758,7 +1758,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
struct iowait *wait, *nw;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
uint i, n = 0, seq, max_idx = 0;
- struct sdma_txreq *stx;
struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
u8 max_starved_cnt = 0;
@@ -1779,19 +1778,13 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
nw,
&sde->dmawait,
list) {
- u16 num_desc = 0;
+ u32 num_desc;
if (!wait->wakeup)
continue;
if (n == ARRAY_SIZE(waits))
break;
- if (!list_empty(&wait->tx_head)) {
- stx = list_first_entry(
- &wait->tx_head,
- struct sdma_txreq,
- list);
- num_desc = stx->num_desc;
- }
+ num_desc = iowait_get_all_desc(wait);
if (num_desc > avail)
break;
avail -= num_desc;
@@ -2346,7 +2339,7 @@ static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
*/
static int sdma_check_progress(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *tx,
bool pkts_sent)
{
@@ -2356,12 +2349,12 @@ static int sdma_check_progress(
if (tx->num_desc <= sde->desc_avail)
return -EAGAIN;
/* pulse the head_lock */
- if (wait && wait->sleep) {
+ if (wait && iowait_ioww_to_iow(wait)->sleep) {
unsigned seq;
seq = raw_seqcount_begin(
(const seqcount_t *)&sde->head_lock.seqcount);
- ret = wait->sleep(sde, wait, tx, seq, pkts_sent);
+ ret = wait->iow->sleep(sde, wait, tx, seq, pkts_sent);
if (ret == -EAGAIN)
sde->desc_avail = sdma_descq_freecnt(sde);
} else {
@@ -2373,7 +2366,7 @@ static int sdma_check_progress(
/**
* sdma_send_txreq() - submit a tx req to ring
* @sde: sdma engine to use
- * @wait: wait structure to use when full (may be NULL)
+ * @wait: SE wait structure to use when full (may be NULL)
* @tx: sdma_txreq to submit
* @pkts_sent: has any packet been sent yet?
*
@@ -2386,7 +2379,7 @@ static int sdma_check_progress(
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/
int sdma_send_txreq(struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *tx,
bool pkts_sent)
{
@@ -2397,7 +2390,7 @@ int sdma_send_txreq(struct sdma_engine *sde,
/* user should have supplied entire packet */
if (unlikely(tx->tlen))
return -EINVAL;
- tx->wait = wait;
+ tx->wait = iowait_ioww_to_iow(wait);
spin_lock_irqsave(&sde->tail_lock, flags);
retry:
if (unlikely(!__sdma_running(sde)))
@@ -2406,14 +2399,14 @@ retry:
goto nodesc;
tail = submit_tx(sde, tx);
if (wait)
- iowait_sdma_inc(wait);
+ iowait_sdma_inc(iowait_ioww_to_iow(wait));
sdma_update_tail(sde, tail);
unlock:
spin_unlock_irqrestore(&sde->tail_lock, flags);
return ret;
unlock_noconn:
if (wait)
- iowait_sdma_inc(wait);
+ iowait_sdma_inc(iowait_ioww_to_iow(wait));
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
tx->sn = sde->tail_sn++;
@@ -2422,10 +2415,7 @@ unlock_noconn:
spin_lock(&sde->flushlist_lock);
list_add_tail(&tx->list, &sde->flushlist);
spin_unlock(&sde->flushlist_lock);
- if (wait) {
- wait->tx_count++;
- wait->count += tx->num_desc;
- }
+ iowait_inc_wait_count(wait, tx->num_desc);
schedule_work(&sde->flush_worker);
ret = -ECOMM;
goto unlock;
@@ -2442,9 +2432,9 @@ nodesc:
/**
* sdma_send_txlist() - submit a list of tx req to ring
* @sde: sdma engine to use
- * @wait: wait structure to use when full (may be NULL)
+ * @wait: SE wait structure to use when full (may be NULL)
* @tx_list: list of sdma_txreqs to submit
- * @count: pointer to a u32 which, after return will contain the total number of
+ * @count: pointer to a u16 which, after return will contain the total number of
* sdma_txreqs removed from the tx_list. This will include sdma_txreqs
* whose SDMA descriptors are submitted to the ring and the sdma_txreqs
* which are added to SDMA engine flush list if the SDMA engine state is
@@ -2467,8 +2457,8 @@ nodesc:
* -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/
-int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
- struct list_head *tx_list, u32 *count_out)
+int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait,
+ struct list_head *tx_list, u16 *count_out)
{
struct sdma_txreq *tx, *tx_next;
int ret = 0;
@@ -2479,7 +2469,7 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
spin_lock_irqsave(&sde->tail_lock, flags);
retry:
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
- tx->wait = wait;
+ tx->wait = iowait_ioww_to_iow(wait);
if (unlikely(!__sdma_running(sde)))
goto unlock_noconn;
if (unlikely(tx->num_desc > sde->desc_avail))
@@ -2500,8 +2490,9 @@ retry:
update_tail:
total_count = submit_count + flush_count;
if (wait) {
- iowait_sdma_add(wait, total_count);
- iowait_starve_clear(submit_count > 0, wait);
+ iowait_sdma_add(iowait_ioww_to_iow(wait), total_count);
+ iowait_starve_clear(submit_count > 0,
+ iowait_ioww_to_iow(wait));
}
if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail);
@@ -2511,7 +2502,7 @@ update_tail:
unlock_noconn:
spin_lock(&sde->flushlist_lock);
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
- tx->wait = wait;
+ tx->wait = iowait_ioww_to_iow(wait);
list_del_init(&tx->list);
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
@@ -2520,10 +2511,7 @@ unlock_noconn:
#endif
list_add_tail(&tx->list, &sde->flushlist);
flush_count++;
- if (wait) {
- wait->tx_count++;
- wait->count += tx->num_desc;
- }
+ iowait_inc_wait_count(wait, tx->num_desc);
}
spin_unlock(&sde->flushlist_lock);
schedule_work(&sde->flush_worker);
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 46c775f255d1..6dc63d7c5685 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_SDMA_H
#define _HFI1_SDMA_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -62,16 +62,6 @@
/* Hardware limit for SDMA packet size */
#define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1)
-#define SDMA_TXREQ_S_OK 0
-#define SDMA_TXREQ_S_SENDERROR 1
-#define SDMA_TXREQ_S_ABORTED 2
-#define SDMA_TXREQ_S_SHUTDOWN 3
-
-/* flags bits */
-#define SDMA_TXREQ_F_URGENT 0x0001
-#define SDMA_TXREQ_F_AHG_COPY 0x0002
-#define SDMA_TXREQ_F_USE_AHG 0x0004
-
#define SDMA_MAP_NONE 0
#define SDMA_MAP_SINGLE 1
#define SDMA_MAP_PAGE 2
@@ -415,6 +405,7 @@ struct sdma_engine {
struct list_head flushlist;
struct cpumask cpu_mask;
struct kobject kobj;
+ u32 msix_intr;
};
int sdma_init(struct hfi1_devdata *dd, u8 port);
@@ -849,16 +840,16 @@ static inline int sdma_txadd_kvaddr(
dd, SDMA_MAP_SINGLE, tx, addr, len);
}
-struct iowait;
+struct iowait_work;
int sdma_send_txreq(struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *tx,
bool pkts_sent);
int sdma_send_txlist(struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct list_head *tx_list,
- u32 *count);
+ u16 *count_out);
int sdma_ahg_alloc(struct sdma_engine *sde);
void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 25e867393463..2be513d4c9da 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -494,17 +494,18 @@ static struct kobj_type hfi1_vl2mtu_ktype = {
* Start of per-unit (or driver, in some cases, but replicated
* per unit) functions (these get a device *)
*/
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+ char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
@@ -517,8 +518,9 @@ static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
return ret;
}
+static DEVICE_ATTR_RO(board_id);
-static ssize_t show_boardversion(struct device *device,
+static ssize_t boardversion_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@@ -528,8 +530,9 @@ static ssize_t show_boardversion(struct device *device,
/* The string printed here is already newline-terminated. */
return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
}
+static DEVICE_ATTR_RO(boardversion);
-static ssize_t show_nctxts(struct device *device,
+static ssize_t nctxts_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@@ -546,8 +549,9 @@ static ssize_t show_nctxts(struct device *device,
min(dd->num_user_contexts,
(u32)dd->sc_sizes[SC_USER].count));
}
+static DEVICE_ATTR_RO(nctxts);
-static ssize_t show_nfreectxts(struct device *device,
+static ssize_t nfreectxts_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@@ -557,8 +561,9 @@ static ssize_t show_nfreectxts(struct device *device,
/* Return the number of free user ports (contexts) available. */
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
}
+static DEVICE_ATTR_RO(nfreectxts);
-static ssize_t show_serial(struct device *device,
+static ssize_t serial_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@@ -567,8 +572,9 @@ static ssize_t show_serial(struct device *device,
return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
}
+static DEVICE_ATTR_RO(serial);
-static ssize_t store_chip_reset(struct device *device,
+static ssize_t chip_reset_store(struct device *device,
struct device_attribute *attr, const char *buf,
size_t count)
{
@@ -586,6 +592,7 @@ static ssize_t store_chip_reset(struct device *device,
bail:
return ret < 0 ? ret : count;
}
+static DEVICE_ATTR_WO(chip_reset);
/*
* Convert the reported temperature from an integer (reported in
@@ -598,7 +605,7 @@ bail:
/*
* Dump tempsense values, in decimal, to ease shell-scripts.
*/
-static ssize_t show_tempsense(struct device *device,
+static ssize_t tempsense_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@@ -622,6 +629,7 @@ static ssize_t show_tempsense(struct device *device,
}
return ret;
}
+static DEVICE_ATTR_RO(tempsense);
/*
* end of per-unit (or driver, in some cases, but replicated
@@ -629,24 +637,20 @@ static ssize_t show_tempsense(struct device *device,
*/
/* start of per-unit file structures and support code */
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_hfi, NULL);
-static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL);
-static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL);
-static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
-static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
-static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
-static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
-
-static struct device_attribute *hfi1_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_board_id,
- &dev_attr_nctxts,
- &dev_attr_nfreectxts,
- &dev_attr_serial,
- &dev_attr_boardversion,
- &dev_attr_tempsense,
- &dev_attr_chip_reset,
+static struct attribute *hfi1_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_board_id.attr,
+ &dev_attr_nctxts.attr,
+ &dev_attr_nfreectxts.attr,
+ &dev_attr_serial.attr,
+ &dev_attr_boardversion.attr,
+ &dev_attr_tempsense.attr,
+ &dev_attr_chip_reset.attr,
+ NULL,
+};
+
+const struct attribute_group ib_hfi1_attr_group = {
+ .attrs = hfi1_attributes,
};
int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
@@ -832,12 +836,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
struct device *class_dev = &dev->dev;
int i, j, ret;
- for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
- ret = device_create_file(&dev->dev, hfi1_attributes[i]);
- if (ret)
- goto bail;
- }
-
for (i = 0; i < dd->num_sdma; i++) {
ret = kobject_init_and_add(&dd->per_sdma[i].kobj,
&sde_ktype, &class_dev->kobj,
@@ -855,9 +853,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
return 0;
bail:
- for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i)
- device_remove_file(&dev->dev, hfi1_attributes[i]);
-
for (i = 0; i < dd->num_sdma; i++)
kobject_del(&dd->per_sdma[i].kobj);
diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h
index 8540463ef3f7..84458f1325e1 100644
--- a/drivers/infiniband/hw/hfi1/trace.h
+++ b/drivers/infiniband/hw/hfi1/trace.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -62,3 +62,4 @@ __print_symbolic(etype, \
#include "trace_rx.h"
#include "trace_tx.h"
#include "trace_mmu.h"
+#include "trace_iowait.h"
diff --git a/drivers/infiniband/hw/hfi1/trace_iowait.h b/drivers/infiniband/hw/hfi1/trace_iowait.h
new file mode 100644
index 000000000000..27f4334ece2b
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/trace_iowait.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#if !defined(__HFI1_TRACE_IOWAIT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_IOWAIT_H
+
+#include <linux/tracepoint.h>
+#include "iowait.h"
+#include "verbs.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_iowait
+
+DECLARE_EVENT_CLASS(hfi1_iowait_template,
+ TP_PROTO(struct iowait *wait, u32 flag),
+ TP_ARGS(wait, flag),
+ TP_STRUCT__entry(/* entry */
+ __field(unsigned long, addr)
+ __field(unsigned long, flags)
+ __field(u32, flag)
+ __field(u32, qpn)
+ ),
+ TP_fast_assign(/* assign */
+ __entry->addr = (unsigned long)wait;
+ __entry->flags = wait->flags;
+ __entry->flag = (1 << flag);
+ __entry->qpn = iowait_to_qp(wait)->ibqp.qp_num;
+ ),
+ TP_printk(/* print */
+ "iowait 0x%lx qp %u flags 0x%lx flag 0x%x",
+ __entry->addr,
+ __entry->qpn,
+ __entry->flags,
+ __entry->flag
+ )
+ );
+
+DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_set,
+ TP_PROTO(struct iowait *wait, u32 flag),
+ TP_ARGS(wait, flag));
+
+DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_clear,
+ TP_PROTO(struct iowait *wait, u32 flag),
+ TP_ARGS(wait, flag));
+
+#endif /* __HFI1_TRACE_IOWAIT_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_iowait
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index e254dcec6f64..6aca0c5a7f97 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -88,7 +88,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
clear_ahg(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done_free_tx;
}
@@ -140,7 +140,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp, wqe->wr.ex.invalidate_rkey);
local_ops = 1;
}
- hfi1_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
+ rvt_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
: IB_WC_SUCCESS);
if (local_ops)
atomic_dec(&qp->local_ops_pending);
@@ -426,7 +426,7 @@ send_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto rewind;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, false, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
break;
case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -449,7 +449,7 @@ send_last:
if (unlikely(wc.byte_len > qp->r_len))
goto rewind;
wc.opcode = IB_WC_RECV;
- hfi1_copy_sge(&qp->r_sge, data, tlen, false, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
rvt_put_ss(&qp->s_rdma_read_sge);
last_imm:
wc.wr_id = qp->r_wr_id;
@@ -523,7 +523,7 @@ rdma_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto drop;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -550,7 +550,7 @@ rdma_last_imm:
}
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
- hfi1_copy_sge(&qp->r_sge, data, tlen, true, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
goto last_imm;
@@ -564,7 +564,7 @@ rdma_last:
tlen -= (hdrsize + extra_bytes);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
- hfi1_copy_sge(&qp->r_sge, data, tlen, true, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
break;
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 70d39fc450a1..4baa8f4d49de 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -210,8 +210,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
}
hfi1_make_grh(ibp, &grh, &grd, 0, 0);
- hfi1_copy_sge(&qp->r_sge, &grh,
- sizeof(grh), true, false);
+ rvt_copy_sge(qp, &qp->r_sge, &grh,
+ sizeof(grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else {
rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
@@ -228,7 +228,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (len > sge->sge_length)
len = sge->sge_length;
WARN_ON_ONCE(len == 0);
- hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, true, false);
+ rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
@@ -518,7 +518,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
goto bail;
}
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done_free_tx;
}
@@ -560,7 +560,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, tflags);
ps->flags = tflags;
- hfi1_send_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done_free_tx;
}
}
@@ -1019,8 +1019,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
goto drop;
}
if (packet->grh) {
- hfi1_copy_sge(&qp->r_sge, packet->grh,
- sizeof(struct ib_grh), true, false);
+ rvt_copy_sge(qp, &qp->r_sge, packet->grh,
+ sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else if (packet->etype == RHF_RCV_TYPE_BYPASS) {
struct ib_grh grh;
@@ -1030,14 +1030,14 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
* out when creating 16B, add back the GRH here.
*/
hfi1_make_ext_grh(packet, &grh, slid, dlid);
- hfi1_copy_sge(&qp->r_sge, &grh,
- sizeof(struct ib_grh), true, false);
+ rvt_copy_sge(qp, &qp->r_sge, &grh,
+ sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else {
rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
}
- hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
- true, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
+ true, false);
rvt_put_ss(&qp->r_sge);
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
return;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index a3a7b33196d6..3f0aadccd9f6 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -76,8 +76,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
static unsigned initial_pkt_count = 8;
-static int user_sdma_send_pkts(struct user_sdma_request *req,
- unsigned maxpkts);
+static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts);
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
static void user_sdma_free_request(struct user_sdma_request *req, bool unpin);
@@ -101,7 +100,7 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
static int defer_packet_queue(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *txreq,
uint seq,
bool pkts_sent);
@@ -124,13 +123,13 @@ static struct mmu_rb_ops sdma_rb_ops = {
static int defer_packet_queue(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *txreq,
uint seq,
bool pkts_sent)
{
struct hfi1_user_sdma_pkt_q *pq =
- container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
+ container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
struct hfi1_ibdev *dev = &pq->dd->verbs_dev;
struct user_sdma_txreq *tx =
container_of(txreq, struct user_sdma_txreq, txreq);
@@ -187,13 +186,12 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
pq->ctxt = uctxt->ctxt;
pq->subctxt = fd->subctxt;
pq->n_max_reqs = hfi1_sdma_comp_ring_size;
- pq->state = SDMA_PKT_Q_INACTIVE;
atomic_set(&pq->n_reqs, 0);
init_waitqueue_head(&pq->wait);
atomic_set(&pq->n_locked, 0);
pq->mm = fd->mm;
- iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
+ iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
activate_packet_queue, NULL);
pq->reqidx = 0;
@@ -276,7 +274,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
/* Wait until all requests have been freed. */
wait_event_interruptible(
pq->wait,
- (READ_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE));
+ !atomic_read(&pq->n_reqs));
kfree(pq->reqs);
kfree(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
@@ -312,6 +310,13 @@ static u8 dlid_to_selector(u16 dlid)
return mapping[hash];
}
+/**
+ * hfi1_user_sdma_process_request() - Process and start a user sdma request
+ * @fd: valid file descriptor
+ * @iovec: array of io vectors to process
+ * @dim: overall iovec array size
+ * @count: number of io vector array entries processed
+ */
int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
struct iovec *iovec, unsigned long dim,
unsigned long *count)
@@ -328,7 +333,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
u8 opcode, sc, vl;
u16 pkey;
u32 slid;
- int req_queued = 0;
u16 dlid;
u32 selector;
@@ -392,7 +396,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->data_len = 0;
req->pq = pq;
req->cq = cq;
- req->status = -1;
req->ahg_idx = -1;
req->iov_idx = 0;
req->sent = 0;
@@ -400,12 +403,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->seqcomp = 0;
req->seqsubmitted = 0;
req->tids = NULL;
- req->done = 0;
req->has_error = 0;
INIT_LIST_HEAD(&req->txps);
memcpy(&req->info, &info, sizeof(info));
+ /* The request is initialized, count it */
+ atomic_inc(&pq->n_reqs);
+
if (req_opcode(info.ctrl) == EXPECTED) {
/* expected must have a TID info and at least one data vector */
if (req->data_iovs < 2) {
@@ -500,7 +505,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
ret = pin_vector_pages(req, &req->iovs[i]);
if (ret) {
req->data_iovs = i;
- req->status = ret;
goto free_req;
}
req->data_len += req->iovs[i].iov.iov_len;
@@ -561,23 +565,11 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->ahg_idx = sdma_ahg_alloc(req->sde);
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
- atomic_inc(&pq->n_reqs);
- req_queued = 1;
+ pq->state = SDMA_PKT_Q_ACTIVE;
/* Send the first N packets in the request to buy us some time */
ret = user_sdma_send_pkts(req, pcount);
- if (unlikely(ret < 0 && ret != -EBUSY)) {
- req->status = ret;
+ if (unlikely(ret < 0 && ret != -EBUSY))
goto free_req;
- }
-
- /*
- * It is possible that the SDMA engine would have processed all the
- * submitted packets by the time we get here. Therefore, only set
- * packet queue state to ACTIVE if there are still uncompleted
- * requests.
- */
- if (atomic_read(&pq->n_reqs))
- xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
/*
* This is a somewhat blocking send implementation.
@@ -588,14 +580,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
while (req->seqsubmitted != req->info.npkts) {
ret = user_sdma_send_pkts(req, pcount);
if (ret < 0) {
- if (ret != -EBUSY) {
- req->status = ret;
- WRITE_ONCE(req->has_error, 1);
- if (READ_ONCE(req->seqcomp) ==
- req->seqsubmitted - 1)
- goto free_req;
- return ret;
- }
+ if (ret != -EBUSY)
+ goto free_req;
wait_event_interruptible_timeout(
pq->busy.wait_dma,
(pq->state == SDMA_PKT_Q_ACTIVE),
@@ -606,10 +592,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
*count += idx;
return 0;
free_req:
- user_sdma_free_request(req, true);
- if (req_queued)
+ /*
+ * If the submitted seqsubmitted == npkts, the completion routine
+ * controls the final state. If sequbmitted < npkts, wait for any
+ * outstanding packets to finish before cleaning up.
+ */
+ if (req->seqsubmitted < req->info.npkts) {
+ if (req->seqsubmitted)
+ wait_event(pq->busy.wait_dma,
+ (req->seqcomp == req->seqsubmitted - 1));
+ user_sdma_free_request(req, true);
pq_update(pq);
- set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
+ set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
+ }
return ret;
}
@@ -760,9 +755,10 @@ static int user_sdma_txadd(struct user_sdma_request *req,
return ret;
}
-static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
+static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
{
- int ret = 0, count;
+ int ret = 0;
+ u16 count;
unsigned npkts = 0;
struct user_sdma_txreq *tx = NULL;
struct hfi1_user_sdma_pkt_q *pq = NULL;
@@ -828,7 +824,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
if (READ_ONCE(iovec->offset) == iovec->iov.iov_len) {
if (++req->iov_idx == req->data_iovs) {
ret = -EFAULT;
- goto free_txreq;
+ goto free_tx;
}
iovec = &req->iovs[req->iov_idx];
WARN_ON(iovec->offset);
@@ -864,8 +860,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
changes = set_txreq_header_ahg(req, tx,
datalen);
- if (changes < 0)
+ if (changes < 0) {
+ ret = changes;
goto free_tx;
+ }
}
} else {
ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
@@ -914,10 +912,11 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
npkts++;
}
dosend:
- ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
+ ret = sdma_send_txlist(req->sde,
+ iowait_get_ib_work(&pq->busy),
+ &req->txps, &count);
req->seqsubmitted += count;
if (req->seqsubmitted == req->info.npkts) {
- WRITE_ONCE(req->done, 1);
/*
* The txreq has already been submitted to the HW queue
* so we can free the AHG entry now. Corruption will not
@@ -1365,11 +1364,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
return idx;
}
-/*
- * SDMA tx request completion callback. Called when the SDMA progress
- * state machine gets notification that the SDMA descriptors for this
- * tx request have been processed by the DMA engine. Called in
- * interrupt context.
+/**
+ * user_sdma_txreq_cb() - SDMA tx request completion callback.
+ * @txreq: valid sdma tx request
+ * @status: success/failure of request
+ *
+ * Called when the SDMA progress state machine gets notification that
+ * the SDMA descriptors for this tx request have been processed by the
+ * DMA engine. Called in interrupt context.
+ * Only do work on completed sequences.
*/
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
{
@@ -1378,7 +1381,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
struct user_sdma_request *req;
struct hfi1_user_sdma_pkt_q *pq;
struct hfi1_user_sdma_comp_q *cq;
- u16 idx;
+ enum hfi1_sdma_comp_state state = COMPLETE;
if (!tx->req)
return;
@@ -1391,39 +1394,25 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
SDMA_DBG(req, "SDMA completion with error %d",
status);
WRITE_ONCE(req->has_error, 1);
+ state = ERROR;
}
req->seqcomp = tx->seqnum;
kmem_cache_free(pq->txreq_cache, tx);
- tx = NULL;
-
- idx = req->info.comp_idx;
- if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
- if (req->seqcomp == req->info.npkts - 1) {
- req->status = 0;
- user_sdma_free_request(req, false);
- pq_update(pq);
- set_comp_state(pq, cq, idx, COMPLETE, 0);
- }
- } else {
- if (status != SDMA_TXREQ_S_OK)
- req->status = status;
- if (req->seqcomp == (READ_ONCE(req->seqsubmitted) - 1) &&
- (READ_ONCE(req->done) ||
- READ_ONCE(req->has_error))) {
- user_sdma_free_request(req, false);
- pq_update(pq);
- set_comp_state(pq, cq, idx, ERROR, req->status);
- }
- }
+
+ /* sequence isn't complete? We are done */
+ if (req->seqcomp != req->info.npkts - 1)
+ return;
+
+ user_sdma_free_request(req, false);
+ set_comp_state(pq, cq, req->info.comp_idx, state, status);
+ pq_update(pq);
}
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
{
- if (atomic_dec_and_test(&pq->n_reqs)) {
- xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
+ if (atomic_dec_and_test(&pq->n_reqs))
wake_up(&pq->wait);
- }
}
static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
@@ -1448,6 +1437,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
if (!node)
continue;
+ req->iovs[i].node = NULL;
+
if (unpin)
hfi1_mmu_rb_remove(req->pq->handler,
&node->rb);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index d2bc77f75253..14dfd757dafd 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -105,9 +105,10 @@ static inline int ahg_header_set(u32 *arr, int idx, size_t array_size,
#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
-#define SDMA_PKT_Q_INACTIVE BIT(0)
-#define SDMA_PKT_Q_ACTIVE BIT(1)
-#define SDMA_PKT_Q_DEFERRED BIT(2)
+enum pkt_q_sdma_state {
+ SDMA_PKT_Q_ACTIVE,
+ SDMA_PKT_Q_DEFERRED,
+};
/*
* Maximum retry attempts to submit a TX request
@@ -133,7 +134,7 @@ struct hfi1_user_sdma_pkt_q {
struct user_sdma_request *reqs;
unsigned long *req_in_use;
struct iowait busy;
- unsigned state;
+ enum pkt_q_sdma_state state;
wait_queue_head_t wait;
unsigned long unpinned;
struct mmu_rb_handler *handler;
@@ -203,14 +204,12 @@ struct user_sdma_request {
s8 ahg_idx;
/* Writeable fields shared with interrupt */
- u64 seqcomp ____cacheline_aligned_in_smp;
- u64 seqsubmitted;
- /* status of the last txreq completed */
- int status;
+ u16 seqcomp ____cacheline_aligned_in_smp;
+ u16 seqsubmitted;
/* Send side fields */
struct list_head txps ____cacheline_aligned_in_smp;
- u64 seqnum;
+ u16 seqnum;
/*
* KDETH.OFFSET (TID) field
* The offset can cover multiple packets, depending on the
@@ -228,7 +227,6 @@ struct user_sdma_request {
u16 tididx;
/* progress index moving along the iovs array */
u8 iov_idx;
- u8 done;
u8 has_error;
struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
@@ -248,7 +246,7 @@ struct user_sdma_txreq {
struct user_sdma_request *req;
u16 flags;
unsigned int busycount;
- u64 seqnum;
+ u16 seqnum;
};
int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 13374c727b14..48e11e510358 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -129,8 +129,6 @@ unsigned short piothreshold = 256;
module_param(piothreshold, ushort, S_IRUGO);
MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");
-#define COPY_CACHELESS 1
-#define COPY_ADAPTIVE 2
static unsigned int sge_copy_mode;
module_param(sge_copy_mode, uint, S_IRUGO);
MODULE_PARM_DESC(sge_copy_mode,
@@ -151,159 +149,13 @@ static int pio_wait(struct rvt_qp *qp,
/* 16B trailing buffer */
static const u8 trail_buf[MAX_16B_PADDING];
-static uint wss_threshold;
+static uint wss_threshold = 80;
module_param(wss_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
static uint wss_clean_period = 256;
module_param(wss_clean_period, uint, S_IRUGO);
MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned");
-/* memory working set size */
-struct hfi1_wss {
- unsigned long *entries;
- atomic_t total_count;
- atomic_t clean_counter;
- atomic_t clean_entry;
-
- int threshold;
- int num_entries;
- long pages_mask;
-};
-
-static struct hfi1_wss wss;
-
-int hfi1_wss_init(void)
-{
- long llc_size;
- long llc_bits;
- long table_size;
- long table_bits;
-
- /* check for a valid percent range - default to 80 if none or invalid */
- if (wss_threshold < 1 || wss_threshold > 100)
- wss_threshold = 80;
- /* reject a wildly large period */
- if (wss_clean_period > 1000000)
- wss_clean_period = 256;
- /* reject a zero period */
- if (wss_clean_period == 0)
- wss_clean_period = 1;
-
- /*
- * Calculate the table size - the next power of 2 larger than the
- * LLC size. LLC size is in KiB.
- */
- llc_size = wss_llc_size() * 1024;
- table_size = roundup_pow_of_two(llc_size);
-
- /* one bit per page in rounded up table */
- llc_bits = llc_size / PAGE_SIZE;
- table_bits = table_size / PAGE_SIZE;
- wss.pages_mask = table_bits - 1;
- wss.num_entries = table_bits / BITS_PER_LONG;
-
- wss.threshold = (llc_bits * wss_threshold) / 100;
- if (wss.threshold == 0)
- wss.threshold = 1;
-
- atomic_set(&wss.clean_counter, wss_clean_period);
-
- wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries),
- GFP_KERNEL);
- if (!wss.entries) {
- hfi1_wss_exit();
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void hfi1_wss_exit(void)
-{
- /* coded to handle partially initialized and repeat callers */
- kfree(wss.entries);
- wss.entries = NULL;
-}
-
-/*
- * Advance the clean counter. When the clean period has expired,
- * clean an entry.
- *
- * This is implemented in atomics to avoid locking. Because multiple
- * variables are involved, it can be racy which can lead to slightly
- * inaccurate information. Since this is only a heuristic, this is
- * OK. Any innaccuracies will clean themselves out as the counter
- * advances. That said, it is unlikely the entry clean operation will
- * race - the next possible racer will not start until the next clean
- * period.
- *
- * The clean counter is implemented as a decrement to zero. When zero
- * is reached an entry is cleaned.
- */
-static void wss_advance_clean_counter(void)
-{
- int entry;
- int weight;
- unsigned long bits;
-
- /* become the cleaner if we decrement the counter to zero */
- if (atomic_dec_and_test(&wss.clean_counter)) {
- /*
- * Set, not add, the clean period. This avoids an issue
- * where the counter could decrement below the clean period.
- * Doing a set can result in lost decrements, slowing the
- * clean advance. Since this a heuristic, this possible
- * slowdown is OK.
- *
- * An alternative is to loop, advancing the counter by a
- * clean period until the result is > 0. However, this could
- * lead to several threads keeping another in the clean loop.
- * This could be mitigated by limiting the number of times
- * we stay in the loop.
- */
- atomic_set(&wss.clean_counter, wss_clean_period);
-
- /*
- * Uniquely grab the entry to clean and move to next.
- * The current entry is always the lower bits of
- * wss.clean_entry. The table size, wss.num_entries,
- * is always a power-of-2.
- */
- entry = (atomic_inc_return(&wss.clean_entry) - 1)
- & (wss.num_entries - 1);
-
- /* clear the entry and count the bits */
- bits = xchg(&wss.entries[entry], 0);
- weight = hweight64((u64)bits);
- /* only adjust the contended total count if needed */
- if (weight)
- atomic_sub(weight, &wss.total_count);
- }
-}
-
-/*
- * Insert the given address into the working set array.
- */
-static void wss_insert(void *address)
-{
- u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask;
- u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
- u32 nr = page & (BITS_PER_LONG - 1);
-
- if (!test_and_set_bit(nr, &wss.entries[entry]))
- atomic_inc(&wss.total_count);
-
- wss_advance_clean_counter();
-}
-
-/*
- * Is the working set larger than the threshold?
- */
-static inline bool wss_exceeds_threshold(void)
-{
- return atomic_read(&wss.total_count) >= wss.threshold;
-}
-
/*
* Translate ib_wr_opcode into ib_wc_opcode.
*/
@@ -438,79 +290,6 @@ static const u32 pio_opmask[BIT(3)] = {
*/
__be64 ib_hfi1_sys_image_guid;
-/**
- * hfi1_copy_sge - copy data to SGE memory
- * @ss: the SGE state
- * @data: the data to copy
- * @length: the length of the data
- * @release: boolean to release MR
- * @copy_last: do a separate copy of the last 8 bytes
- */
-void hfi1_copy_sge(
- struct rvt_sge_state *ss,
- void *data, u32 length,
- bool release,
- bool copy_last)
-{
- struct rvt_sge *sge = &ss->sge;
- int i;
- bool in_last = false;
- bool cacheless_copy = false;
-
- if (sge_copy_mode == COPY_CACHELESS) {
- cacheless_copy = length >= PAGE_SIZE;
- } else if (sge_copy_mode == COPY_ADAPTIVE) {
- if (length >= PAGE_SIZE) {
- /*
- * NOTE: this *assumes*:
- * o The first vaddr is the dest.
- * o If multiple pages, then vaddr is sequential.
- */
- wss_insert(sge->vaddr);
- if (length >= (2 * PAGE_SIZE))
- wss_insert(sge->vaddr + PAGE_SIZE);
-
- cacheless_copy = wss_exceeds_threshold();
- } else {
- wss_advance_clean_counter();
- }
- }
- if (copy_last) {
- if (length > 8) {
- length -= 8;
- } else {
- copy_last = false;
- in_last = true;
- }
- }
-
-again:
- while (length) {
- u32 len = rvt_get_sge_length(sge, length);
-
- WARN_ON_ONCE(len == 0);
- if (unlikely(in_last)) {
- /* enforce byte transfer ordering */
- for (i = 0; i < len; i++)
- ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
- } else if (cacheless_copy) {
- cacheless_memcpy(sge->vaddr, data, len);
- } else {
- memcpy(sge->vaddr, data, len);
- }
- rvt_update_sge(ss, len, release);
- data += len;
- length -= len;
- }
-
- if (copy_last) {
- copy_last = false;
- in_last = true;
- length = 8;
- goto again;
- }
-}
-
/*
* Make sure the QP is ready and able to accept the given opcode.
*/
@@ -713,7 +492,7 @@ static void verbs_sdma_complete(
spin_lock(&qp->s_lock);
if (tx->wqe) {
- hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
+ rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
struct hfi1_opa_header *hdr;
@@ -737,7 +516,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
write_seqlock(&dev->iowait_lock);
list_add_tail(&ps->s_txreq->txreq.list,
- &priv->s_iowait.tx_head);
+ &ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) {
if (list_empty(&dev->memwait))
mod_timer(&dev->mem_timer, jiffies + 1);
@@ -748,7 +527,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
rvt_get_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
- qp->s_flags &= ~RVT_S_BUSY;
+ hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -950,8 +729,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (unlikely(ret))
goto bail_build;
}
- ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq,
- ps->pkts_sent);
+ ret = sdma_send_txreq(tx->sde, ps->wait, &tx->txreq, ps->pkts_sent);
if (unlikely(ret < 0)) {
if (ret == -ECOMM)
goto bail_ecomm;
@@ -1001,7 +779,7 @@ static int pio_wait(struct rvt_qp *qp,
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
write_seqlock(&dev->iowait_lock);
list_add_tail(&ps->s_txreq->txreq.list,
- &priv->s_iowait.tx_head);
+ &ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) {
struct hfi1_ibdev *dev = &dd->verbs_dev;
int was_empty;
@@ -1020,7 +798,7 @@ static int pio_wait(struct rvt_qp *qp,
hfi1_sc_wantpiobuf_intr(sc, 1);
}
write_sequnlock(&dev->iowait_lock);
- qp->s_flags &= ~RVT_S_BUSY;
+ hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1160,7 +938,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
pio_bail:
if (qp->s_wqe) {
spin_lock_irqsave(&qp->s_lock, flags);
- hfi1_send_complete(qp, qp->s_wqe, wc_status);
+ rvt_send_complete(qp, qp->s_wqe, wc_status);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
spin_lock_irqsave(&qp->s_lock, flags);
@@ -1367,7 +1145,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
hfi1_cdbg(PIO, "%s() Failed. Completing with err",
__func__);
spin_lock_irqsave(&qp->s_lock, flags);
- hfi1_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
+ rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
return -EINVAL;
@@ -1582,6 +1360,7 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
struct hfi1_pportdata *ppd;
struct hfi1_devdata *dd;
u8 sc5;
+ u8 sl;
if (hfi1_check_mcast(rdma_ah_get_dlid(ah_attr)) &&
!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
@@ -1590,8 +1369,13 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
/* test the mapping for validity */
ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
ppd = ppd_from_ibp(ibp);
- sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
dd = dd_from_ppd(ppd);
+
+ sl = rdma_ah_get_sl(ah_attr);
+ if (sl >= ARRAY_SIZE(ibp->sl_to_sc))
+ return -EINVAL;
+
+ sc5 = ibp->sl_to_sc[sl];
if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
return -EINVAL;
return 0;
@@ -1937,7 +1721,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
- dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
+ dd->verbs_dev.rdi.driver_f.setup_wqe = hfi1_setup_wqe;
dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup =
hfi1_comp_vect_mappings_lookup;
@@ -1950,10 +1734,16 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size;
dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
+ dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
+ dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
+ dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
/* post send table */
dd->verbs_dev.rdi.post_parms = hfi1_post_parms;
+ /* opcode translation table */
+ dd->verbs_dev.rdi.wc_opcode = ib_hfi1_wc_opcode;
+
ppd = dd->pport;
for (i = 0; i < dd->num_pports; i++, ppd++)
rvt_init_port(&dd->verbs_dev.rdi,
@@ -1961,6 +1751,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
i,
ppd->pkeys);
+ rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev,
+ &ib_hfi1_attr_group);
+
ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1);
if (ret)
goto err_verbs_txreq;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index a4d06502f06d..64c9054db5f3 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -166,11 +166,13 @@ struct hfi1_qp_priv {
* This structure is used to hold commonly lookedup and computed values during
* the send engine progress.
*/
+struct iowait_work;
struct hfi1_pkt_state {
struct hfi1_ibdev *dev;
struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd;
struct verbs_txreq *s_txreq;
+ struct iowait_work *wait;
unsigned long flags;
unsigned long timeout;
unsigned long timeout_int;
@@ -247,7 +249,7 @@ static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev)
return container_of(rdi, struct hfi1_ibdev, rdi);
}
-static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
+static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
{
struct hfi1_qp_priv *priv;
@@ -313,9 +315,6 @@ void hfi1_put_txreq(struct verbs_txreq *tx);
int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
-void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
- bool release, bool copy_last);
-
void hfi1_cnp_rcv(struct hfi1_packet *packet);
void hfi1_uc_rcv(struct hfi1_packet *packet);
@@ -343,7 +342,8 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
-int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ bool *call_send);
extern const u32 rc_only_opcode;
extern const u32 uc_only_opcode;
@@ -363,9 +363,6 @@ void hfi1_do_send_from_rvt(struct rvt_qp *qp);
void hfi1_do_send(struct rvt_qp *qp, bool in_thread);
-void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
- enum ib_wc_status status);
-
void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn);
int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
@@ -390,28 +387,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc);
-int hfi1_wss_init(void);
-void hfi1_wss_exit(void);
-
-/* platform specific: return the lowest level cache (llc) size, in KiB */
-static inline int wss_llc_size(void)
-{
- /* assume that the boot CPU value is universal for all CPUs */
- return boot_cpu_data.x86_cache_size;
-}
-
-/* platform specific: cacheless copy */
-static inline void cacheless_memcpy(void *dst, void *src, size_t n)
-{
- /*
- * Use the only available X64 cacheless copy. Add a __user cast
- * to quiet sparse. The src agument is already in the kernel so
- * there are no security issues. The extra fault recovery machinery
- * is not invoked.
- */
- __copy_user_nocache(dst, (void __user *)src, n, 0);
-}
-
static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr)
{
return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ);
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 1c19bbc764b2..2a77af26a231 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -102,22 +102,19 @@ static inline struct sdma_txreq *get_sdma_txreq(struct verbs_txreq *tx)
return &tx->txreq;
}
-static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp)
+static inline struct verbs_txreq *get_waiting_verbs_txreq(struct iowait_work *w)
{
struct sdma_txreq *stx;
- struct hfi1_qp_priv *priv = qp->priv;
- stx = iowait_get_txhead(&priv->s_iowait);
+ stx = iowait_get_txhead(w);
if (stx)
return container_of(stx, struct verbs_txreq, txreq);
return NULL;
}
-static inline bool verbs_txreq_queued(struct rvt_qp *qp)
+static inline bool verbs_txreq_queued(struct iowait_work *w)
{
- struct hfi1_qp_priv *priv = qp->priv;
-
- return iowait_packet_queued(&priv->s_iowait);
+ return iowait_packet_queued(w);
}
void hfi1_put_txreq(struct verbs_txreq *tx);
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index c643d80c5a53..c9876d9e3cb9 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -120,7 +120,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
uctxt->seq_cnt = 1;
uctxt->is_vnic = true;
- hfi1_set_vnic_msix_info(uctxt);
+ msix_request_rcd_irq(uctxt);
hfi1_stats.sps_ctxts++;
dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
@@ -135,8 +135,6 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
flush_wc();
- hfi1_reset_vnic_msix_info(uctxt);
-
/*
* Disable receive context and interrupt available, reset all
* RcvCtxtCtrl bits to default values.
@@ -148,6 +146,10 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
+ /* msix_intr will always be > 0, only clean up if this is true */
+ if (uctxt->msix_intr)
+ msix_free_irq(dd, uctxt->msix_intr);
+
uctxt->event_flags = 0;
hfi1_clear_tids(uctxt);
@@ -626,7 +628,7 @@ static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
/* ensure irqs see the change */
- hfi1_vnic_synchronize_irq(dd);
+ msix_vnic_synchronize_irq(dd);
/* remove unread skbs */
for (i = 0; i < vinfo->num_rx_q; i++) {
@@ -690,8 +692,6 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
rc = hfi1_vnic_txreq_init(dd);
if (rc)
goto txreq_fail;
-
- dd->vnic.msix_idx = dd->first_dyn_msix_idx;
}
for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index c3c96c5869ed..97bd940a056a 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2017 Intel Corporation.
+ * Copyright(c) 2017 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -198,8 +198,8 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
goto free_desc;
tx->retry_count = 0;
- ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq,
- vnic_sdma->pkts_sent);
+ ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait),
+ &tx->txreq, vnic_sdma->pkts_sent);
/* When -ECOMM, sdma callback will be called with ABORT status */
if (unlikely(ret && unlikely(ret != -ECOMM)))
goto free_desc;
@@ -230,13 +230,13 @@ tx_err:
* become available.
*/
static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *txreq,
uint seq,
bool pkts_sent)
{
struct hfi1_vnic_sdma *vnic_sdma =
- container_of(wait, struct hfi1_vnic_sdma, wait);
+ container_of(wait->iow, struct hfi1_vnic_sdma, wait);
struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
@@ -247,7 +247,7 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
write_seqlock(&dev->iowait_lock);
if (list_empty(&vnic_sdma->wait.list))
- iowait_queue(pkts_sent, wait, &sde->dmawait);
+ iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
write_sequnlock(&dev->iowait_lock);
return -EBUSY;
}
@@ -285,7 +285,8 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
for (i = 0; i < vinfo->num_tx_q; i++) {
struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i];
- iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep,
+ iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
+ hfi1_vnic_sdma_sleep,
hfi1_vnic_sdma_wakeup, NULL);
vnic_sdma->sde = &vinfo->dd->per_sdma[i];
vnic_sdma->dd = vinfo->dd;
@@ -295,10 +296,12 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
/* Add a free descriptor watermark for wakeups */
if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) {
+ struct iowait_work *work;
+
INIT_LIST_HEAD(&vnic_sdma->stx.list);
vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK;
- list_add_tail(&vnic_sdma->stx.list,
- &vnic_sdma->wait.tx_head);
+ work = iowait_get_ib_work(&vnic_sdma->wait);
+ list_add_tail(&vnic_sdma->stx.list, &work->tx_head);
}
}
}
diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig
index fddb5fdf92de..21c2100b2ea9 100644
--- a/drivers/infiniband/hw/hns/Kconfig
+++ b/drivers/infiniband/hw/hns/Kconfig
@@ -1,6 +1,7 @@
config INFINIBAND_HNS
tristate "HNS RoCE Driver"
depends on NET_VENDOR_HISILICON
+ depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
depends on ARM64 || (COMPILE_TEST && 64BIT)
---help---
This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 0d96c5bb38cd..9990dc9eb96a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -49,6 +49,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
struct hns_roce_ah *ah;
u16 vlan_tag = 0xffff;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+ bool vlan_en = false;
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
@@ -58,8 +59,10 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
gid_attr = ah_attr->grh.sgid_attr;
- if (is_vlan_dev(gid_attr->ndev))
+ if (is_vlan_dev(gid_attr->ndev)) {
vlan_tag = vlan_dev_vlan_id(gid_attr->ndev);
+ vlan_en = true;
+ }
if (vlan_tag < 0x1000)
vlan_tag |= (rdma_ah_get_sl(ah_attr) &
@@ -71,6 +74,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
HNS_ROCE_PORT_NUM_SHIFT));
ah->av.gid_index = grh->sgid_index;
ah->av.vlan = cpu_to_le16(vlan_tag);
+ ah->av.vlan_en = vlan_en;
dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index,
ah->av.vlan);
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 9a24fd0ee3e7..d39bdfdb5de9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -88,8 +88,11 @@
#define BITMAP_RR 1
#define MR_TYPE_MR 0x00
+#define MR_TYPE_FRMR 0x01
#define MR_TYPE_DMA 0x03
+#define HNS_ROCE_FRMR_MAX_PA 512
+
#define PKEY_ID 0xffff
#define GUID_LEN 8
#define NODE_DESC_SIZE 64
@@ -193,6 +196,9 @@ enum {
HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2),
HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3),
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4),
+ HNS_ROCE_CAP_FLAG_MW = BIT(7),
+ HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
+ HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
};
enum hns_roce_mtt_type {
@@ -219,19 +225,11 @@ struct hns_roce_uar {
unsigned long logic_idx;
};
-struct hns_roce_vma_data {
- struct list_head list;
- struct vm_area_struct *vma;
- struct mutex *vma_list_mutex;
-};
-
struct hns_roce_ucontext {
struct ib_ucontext ibucontext;
struct hns_roce_uar uar;
struct list_head page_list;
struct mutex page_mutex;
- struct list_head vma_list;
- struct mutex vma_list_mutex;
};
struct hns_roce_pd {
@@ -293,6 +291,16 @@ struct hns_roce_mtt {
enum hns_roce_mtt_type mtt_type;
};
+struct hns_roce_mw {
+ struct ib_mw ibmw;
+ u32 pdn;
+ u32 rkey;
+ int enabled; /* MW's active status */
+ u32 pbl_hop_num;
+ u32 pbl_ba_pg_sz;
+ u32 pbl_buf_pg_sz;
+};
+
/* Only support 4K page size for mr register */
#define MR_SIZE_4K 0
@@ -304,6 +312,7 @@ struct hns_roce_mr {
u32 key; /* Key of MR */
u32 pd; /* PD num of MR */
u32 access;/* Access permission of MR */
+ u32 npages;
int enabled; /* MR's active status */
int type; /* MR's register type */
u64 *pbl_buf;/* MR's PBL space */
@@ -457,6 +466,7 @@ struct hns_roce_av {
u8 dgid[HNS_ROCE_GID_SIZE];
u8 mac[6];
__le16 vlan;
+ bool vlan_en;
};
struct hns_roce_ah {
@@ -656,6 +666,7 @@ struct hns_roce_eq_table {
};
struct hns_roce_caps {
+ u64 fw_ver;
u8 num_ports;
int gid_table_len[HNS_ROCE_MAX_PORTS];
int pkey_table_len[HNS_ROCE_MAX_PORTS];
@@ -665,7 +676,9 @@ struct hns_roce_caps {
u32 max_sq_sg; /* 2 */
u32 max_sq_inline; /* 32 */
u32 max_rq_sg; /* 2 */
+ u32 max_extend_sg;
int num_qps; /* 256k */
+ int reserved_qps;
u32 max_wqes; /* 16k */
u32 max_sq_desc_sz; /* 64 */
u32 max_rq_desc_sz; /* 64 */
@@ -738,6 +751,7 @@ struct hns_roce_work {
struct hns_roce_dev *hr_dev;
struct work_struct work;
u32 qpn;
+ u32 cqn;
int event_type;
int sub_type;
};
@@ -764,6 +778,8 @@ struct hns_roce_hw {
struct hns_roce_mr *mr, int flags, u32 pdn,
int mr_access_flags, u64 iova, u64 size,
void *mb_buf);
+ int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr);
+ int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw);
void (*write_cqc)(struct hns_roce_dev *hr_dev,
struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
dma_addr_t dma_handle, int nent, u32 vector);
@@ -863,6 +879,11 @@ static inline struct hns_roce_mr *to_hr_mr(struct ib_mr *ibmr)
return container_of(ibmr, struct hns_roce_mr, ibmr);
}
+static inline struct hns_roce_mw *to_hr_mw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct hns_roce_mw, ibmw);
+}
+
static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct hns_roce_qp, ibqp);
@@ -968,12 +989,20 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length,
u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
struct ib_udata *udata);
+struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg);
+int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
int hns_roce_dereg_mr(struct ib_mr *ibmr);
int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
struct hns_roce_cmd_mailbox *mailbox,
unsigned long mpt_index);
unsigned long key_to_hw_index(u32 key);
+struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type,
+ struct ib_udata *udata);
+int hns_roce_dealloc_mw(struct ib_mw *ibmw);
+
void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
struct hns_roce_buf *buf);
int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 081aa91fc162..ca05810c92dc 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -731,7 +731,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
cq_init_attr.comp_vector = 0;
cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL);
if (IS_ERR(cq)) {
- dev_err(dev, "Create cq for reseved loop qp failed!");
+ dev_err(dev, "Create cq for reserved loop qp failed!");
return -ENOMEM;
}
free_mr->mr_free_cq = to_hr_cq(cq);
@@ -744,7 +744,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL);
if (IS_ERR(pd)) {
- dev_err(dev, "Create pd for reseved loop qp failed!");
+ dev_err(dev, "Create pd for reserved loop qp failed!");
ret = -ENOMEM;
goto alloc_pd_failed;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 0218c0f8c2a7..a4c62ae23a9a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -54,6 +54,59 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
dseg->len = cpu_to_le32(sg->length);
}
+static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ struct hns_roce_wqe_frmr_seg *fseg,
+ const struct ib_reg_wr *wr)
+{
+ struct hns_roce_mr *mr = to_hr_mr(wr->mr);
+
+ /* use ib_access_flags */
+ roce_set_bit(rc_sq_wqe->byte_4,
+ V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
+ wr->access & IB_ACCESS_MW_BIND ? 1 : 0);
+ roce_set_bit(rc_sq_wqe->byte_4,
+ V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S,
+ wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
+ roce_set_bit(rc_sq_wqe->byte_4,
+ V2_RC_FRMR_WQE_BYTE_4_RR_S,
+ wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0);
+ roce_set_bit(rc_sq_wqe->byte_4,
+ V2_RC_FRMR_WQE_BYTE_4_RW_S,
+ wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
+ roce_set_bit(rc_sq_wqe->byte_4,
+ V2_RC_FRMR_WQE_BYTE_4_LW_S,
+ wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
+
+ /* Data structure reuse may lead to confusion */
+ rc_sq_wqe->msg_len = cpu_to_le32(mr->pbl_ba & 0xffffffff);
+ rc_sq_wqe->inv_key = cpu_to_le32(mr->pbl_ba >> 32);
+
+ rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff);
+ rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32);
+ rc_sq_wqe->rkey = cpu_to_le32(wr->key);
+ rc_sq_wqe->va = cpu_to_le64(wr->mr->iova);
+
+ fseg->pbl_size = cpu_to_le32(mr->pbl_size);
+ roce_set_field(fseg->mode_buf_pg_sz,
+ V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M,
+ V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S,
+ mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+ roce_set_bit(fseg->mode_buf_pg_sz,
+ V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
+}
+
+static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
+ const struct ib_atomic_wr *wr)
+{
+ if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ aseg->fetchadd_swap_data = cpu_to_le64(wr->swap);
+ aseg->cmp_data = cpu_to_le64(wr->compare_add);
+ } else {
+ aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add);
+ aseg->cmp_data = 0;
+ }
+}
+
static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
unsigned int *sge_ind)
{
@@ -121,6 +174,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
}
if (wr->opcode == IB_WR_RDMA_READ) {
+ *bad_wr = wr;
dev_err(hr_dev->dev, "Not support inline data!\n");
return -EINVAL;
}
@@ -179,6 +233,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
struct hns_roce_qp *qp = to_hr_qp(ibqp);
+ struct hns_roce_wqe_frmr_seg *fseg;
struct device *dev = hr_dev->dev;
struct hns_roce_v2_db sq_db;
struct ib_qp_attr attr;
@@ -191,6 +246,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
int attr_mask;
u32 tmp_len;
int ret = 0;
+ u32 hr_op;
u8 *smac;
int nreq;
int i;
@@ -356,6 +412,9 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
V2_UD_SEND_WQE_BYTE_40_PORTN_S,
qp->port);
+ roce_set_bit(ud_sq_wqe->byte_40,
+ V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
+ ah->av.vlan_en ? 1 : 0);
roce_set_field(ud_sq_wqe->byte_48,
V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M,
V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S,
@@ -406,99 +465,100 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
+ wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
switch (wr->opcode) {
case IB_WR_RDMA_READ:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_RDMA_READ);
+ hr_op = HNS_ROCE_V2_WQE_OP_RDMA_READ;
rc_sq_wqe->rkey =
cpu_to_le32(rdma_wr(wr)->rkey);
rc_sq_wqe->va =
cpu_to_le64(rdma_wr(wr)->remote_addr);
break;
case IB_WR_RDMA_WRITE:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_RDMA_WRITE);
+ hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE;
rc_sq_wqe->rkey =
cpu_to_le32(rdma_wr(wr)->rkey);
rc_sq_wqe->va =
cpu_to_le64(rdma_wr(wr)->remote_addr);
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM);
+ hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM;
rc_sq_wqe->rkey =
cpu_to_le32(rdma_wr(wr)->rkey);
rc_sq_wqe->va =
cpu_to_le64(rdma_wr(wr)->remote_addr);
break;
case IB_WR_SEND:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_SEND);
+ hr_op = HNS_ROCE_V2_WQE_OP_SEND;
break;
case IB_WR_SEND_WITH_INV:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_SEND_WITH_INV);
+ hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_INV;
break;
case IB_WR_SEND_WITH_IMM:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM);
+ hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM;
break;
case IB_WR_LOCAL_INV:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_LOCAL_INV);
+ hr_op = HNS_ROCE_V2_WQE_OP_LOCAL_INV;
+ roce_set_bit(rc_sq_wqe->byte_4,
+ V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
+ rc_sq_wqe->inv_key =
+ cpu_to_le32(wr->ex.invalidate_rkey);
+ break;
+ case IB_WR_REG_MR:
+ hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR;
+ fseg = wqe;
+ set_frmr_seg(rc_sq_wqe, fseg, reg_wr(wr));
break;
case IB_WR_ATOMIC_CMP_AND_SWP:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP);
+ hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP;
+ rc_sq_wqe->rkey =
+ cpu_to_le32(atomic_wr(wr)->rkey);
+ rc_sq_wqe->va =
+ cpu_to_le64(atomic_wr(wr)->remote_addr);
break;
case IB_WR_ATOMIC_FETCH_AND_ADD:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD);
+ hr_op = HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD;
+ rc_sq_wqe->rkey =
+ cpu_to_le32(atomic_wr(wr)->rkey);
+ rc_sq_wqe->va =
+ cpu_to_le64(atomic_wr(wr)->remote_addr);
break;
case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP);
+ hr_op =
+ HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP;
break;
case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD);
+ hr_op =
+ HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD;
break;
default:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_MASK);
+ hr_op = HNS_ROCE_V2_WQE_OP_MASK;
break;
}
- wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
+ roce_set_field(rc_sq_wqe->byte_4,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op);
+
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+ struct hns_roce_v2_wqe_data_seg *dseg;
+
+ dseg = wqe;
+ set_data_seg_v2(dseg, wr->sg_list);
+ wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
+ set_atomic_seg(wqe, atomic_wr(wr));
+ roce_set_field(rc_sq_wqe->byte_16,
+ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
+ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
+ wr->num_sge);
+ } else if (wr->opcode != IB_WR_REG_MR) {
+ ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
+ wqe, &sge_ind, bad_wr);
+ if (ret)
+ goto out;
+ }
- ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe,
- &sge_ind, bad_wr);
- if (ret)
- goto out;
ind++;
} else {
dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
@@ -935,7 +995,24 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev)
resp = (struct hns_roce_query_version *)desc.data;
hr_dev->hw_rev = le32_to_cpu(resp->rocee_hw_version);
- hr_dev->vendor_id = le32_to_cpu(resp->rocee_vendor_id);
+ hr_dev->vendor_id = hr_dev->pci_dev->vendor;
+
+ return 0;
+}
+
+static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_query_fw_info *resp;
+ struct hns_roce_cmq_desc desc;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_QUERY_FW_VER, true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ resp = (struct hns_roce_query_fw_info *)desc.data;
+ hr_dev->caps.fw_ver = (u64)(le32_to_cpu(resp->fw_ver));
return 0;
}
@@ -1158,6 +1235,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
ret = hns_roce_cmq_query_hw_info(hr_dev);
if (ret) {
+ dev_err(hr_dev->dev, "Query hardware version fail, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ ret = hns_roce_query_fw_ver(hr_dev);
+ if (ret) {
dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n",
ret);
return ret;
@@ -1185,14 +1269,16 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
return ret;
}
- hr_dev->vendor_part_id = 0;
- hr_dev->sys_image_guid = 0;
+
+ hr_dev->vendor_part_id = hr_dev->pci_dev->device;
+ hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM;
caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM;
caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM;
caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM;
caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM;
+ caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM;
caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM;
caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
caps->num_uars = HNS_ROCE_V2_UAR_NUM;
@@ -1222,6 +1308,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->reserved_mrws = 1;
caps->reserved_uars = 0;
caps->reserved_cqs = 0;
+ caps->reserved_qps = HNS_ROCE_V2_RSV_QPS;
caps->qpc_ba_pg_sz = 0;
caps->qpc_buf_pg_sz = 0;
@@ -1255,6 +1342,11 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
HNS_ROCE_CAP_FLAG_RQ_INLINE |
HNS_ROCE_CAP_FLAG_RECORD_DB |
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
+
+ if (hr_dev->pci_dev->revision == 0x21)
+ caps->flags |= HNS_ROCE_CAP_FLAG_MW |
+ HNS_ROCE_CAP_FLAG_FRMR;
+
caps->pkey_table_len[0] = 1;
caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM;
@@ -1262,6 +1354,9 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->local_ca_ack_delay = 0;
caps->max_mtu = IB_MTU_4096;
+ if (hr_dev->pci_dev->revision == 0x21)
+ caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC;
+
ret = hns_roce_v2_set_bt(hr_dev);
if (ret)
dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n",
@@ -1690,10 +1785,11 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 0);
+ roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S,
(mr->access & IB_ACCESS_MW_BIND ? 1 : 0));
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, 0);
+ roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S,
+ mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
(mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0));
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
@@ -1817,6 +1913,88 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
return 0;
}
+static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
+{
+ struct hns_roce_v2_mpt_entry *mpt_entry;
+
+ mpt_entry = mb_buf;
+ memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+ roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
+ V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
+ roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
+ V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1);
+ roce_set_field(mpt_entry->byte_4_pd_hop_st,
+ V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
+ V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
+ mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
+ roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
+ V2_MPT_BYTE_4_PD_S, mr->pd);
+
+ roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1);
+ roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
+ roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
+
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1);
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0);
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
+
+ mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
+
+ mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
+ roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
+ V2_MPT_BYTE_48_PBL_BA_H_S,
+ upper_32_bits(mr->pbl_ba >> 3));
+
+ roce_set_field(mpt_entry->byte_64_buf_pa1,
+ V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
+ V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
+ mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+
+ return 0;
+}
+
+static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
+{
+ struct hns_roce_v2_mpt_entry *mpt_entry;
+
+ mpt_entry = mb_buf;
+ memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+ roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
+ V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
+ roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
+ V2_MPT_BYTE_4_PD_S, mw->pdn);
+ roce_set_field(mpt_entry->byte_4_pd_hop_st,
+ V2_MPT_BYTE_4_PBL_HOP_NUM_M,
+ V2_MPT_BYTE_4_PBL_HOP_NUM_S,
+ mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ?
+ 0 : mw->pbl_hop_num);
+ roce_set_field(mpt_entry->byte_4_pd_hop_st,
+ V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
+ V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
+ mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
+
+ roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
+ roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
+
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1);
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S,
+ mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1);
+
+ roce_set_field(mpt_entry->byte_64_buf_pa1,
+ V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
+ V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
+ mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+
+ mpt_entry->lkey = cpu_to_le32(mw->rkey);
+
+ return 0;
+}
+
static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
{
return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
@@ -2274,6 +2452,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
wc->src_qp = (u8)roce_get_field(cqe->byte_32,
V2_CQE_BYTE_32_RMT_QPN_M,
V2_CQE_BYTE_32_RMT_QPN_S);
+ wc->slid = 0;
wc->wc_flags |= (roce_get_bit(cqe->byte_32,
V2_CQE_BYTE_32_GRH_S) ?
IB_WC_GRH : 0);
@@ -2287,7 +2466,14 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
wc->smac[5] = roce_get_field(cqe->byte_28,
V2_CQE_BYTE_28_SMAC_5_M,
V2_CQE_BYTE_28_SMAC_5_S);
- wc->vlan_id = 0xffff;
+ if (roce_get_bit(cqe->byte_28, V2_CQE_BYTE_28_VID_VLD_S)) {
+ wc->vlan_id = (u16)roce_get_field(cqe->byte_28,
+ V2_CQE_BYTE_28_VID_M,
+ V2_CQE_BYTE_28_VID_S);
+ } else {
+ wc->vlan_id = 0xffff;
+ }
+
wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
wc->network_hdr_type = roce_get_field(cqe->byte_28,
V2_CQE_BYTE_28_PORT_TYPE_M,
@@ -2589,21 +2775,16 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_TX_ERR_S, 0);
roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_RX_ERR_S, 0);
- roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_MAPID_M,
- V2_QPC_BYTE_60_MAPID_S, 0);
+ roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_TEMPID_M,
+ V2_QPC_BYTE_60_TEMPID_S, 0);
- roce_set_bit(qpc_mask->byte_60_qpst_mapid,
- V2_QPC_BYTE_60_INNER_MAP_IND_S, 0);
- roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_MAP_IND_S,
- 0);
- roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_RQ_MAP_IND_S,
- 0);
- roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_EXT_MAP_IND_S,
- 0);
- roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_RLS_IND_S,
- 0);
- roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_EXT_IND_S,
- 0);
+ roce_set_field(qpc_mask->byte_60_qpst_tempid,
+ V2_QPC_BYTE_60_SCC_TOKEN_M, V2_QPC_BYTE_60_SCC_TOKEN_S,
+ 0);
+ roce_set_bit(qpc_mask->byte_60_qpst_tempid,
+ V2_QPC_BYTE_60_SQ_DB_DOING_S, 0);
+ roce_set_bit(qpc_mask->byte_60_qpst_tempid,
+ V2_QPC_BYTE_60_RQ_DB_DOING_S, 0);
roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0);
roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0);
@@ -2685,7 +2866,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M,
V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0);
- roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RSVD_RAQ_MAP_S, 0);
+ roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S,
+ 0);
roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M,
V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S, 0);
roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_TAIL_M,
@@ -2694,8 +2876,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_144_raq,
V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M,
V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S, 0);
- roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S,
- 0);
roce_set_field(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_CREDIT_M,
V2_QPC_BYTE_144_RAQ_CREDIT_S, 0);
roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RESP_RTY_FLG_S, 0);
@@ -2721,14 +2901,12 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
V2_QPC_BYTE_160_SQ_CONSUMER_IDX_M,
V2_QPC_BYTE_160_SQ_CONSUMER_IDX_S, 0);
- roce_set_field(context->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
- V2_QPC_BYTE_168_SQ_SHIFT_BAK_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(qpc_mask->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
- V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0);
-
+ roce_set_bit(qpc_mask->byte_168_irrl_idx,
+ V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S, 0);
+ roce_set_bit(qpc_mask->byte_168_irrl_idx,
+ V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S, 0);
+ roce_set_bit(qpc_mask->byte_168_irrl_idx,
+ V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S, 0);
roce_set_bit(qpc_mask->byte_168_irrl_idx,
V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S, 0);
roce_set_bit(qpc_mask->byte_168_irrl_idx,
@@ -2746,6 +2924,9 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_MSG_RNR_FLG_S,
0);
+ roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
+ roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 0);
+
roce_set_field(qpc_mask->byte_176_msg_pktn,
V2_QPC_BYTE_176_MSG_USE_PKTN_M,
V2_QPC_BYTE_176_MSG_USE_PKTN_S, 0);
@@ -2790,6 +2971,13 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
V2_QPC_BYTE_232_IRRL_SGE_IDX_M,
V2_QPC_BYTE_232_IRRL_SGE_IDX_S, 0);
+ roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_SO_LP_VLD_S,
+ 0);
+ roce_set_bit(qpc_mask->byte_232_irrl_sge,
+ V2_QPC_BYTE_232_FENCE_LP_VLD_S, 0);
+ roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_IRRL_LP_VLD_S,
+ 0);
+
qpc_mask->irrl_cur_sge_offset = 0;
roce_set_field(qpc_mask->byte_240_irrl_tail,
@@ -2955,13 +3143,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_56_dqpn_err,
V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0);
}
- roce_set_field(context->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
- V2_QPC_BYTE_168_SQ_SHIFT_BAK_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(qpc_mask->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
- V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0);
}
static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
@@ -3271,13 +3452,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
* we should set all bits of the relevant fields in context mask to
* 0 at the same time, else set them to 0x1.
*/
- roce_set_field(context->byte_60_qpst_mapid,
- V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M,
- V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, attr->retry_cnt);
- roce_set_field(qpc_mask->byte_60_qpst_mapid,
- V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M,
- V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, 0);
-
context->sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
roce_set_field(context->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
@@ -3538,6 +3712,17 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN);
}
+ if (is_vlan_dev(gid_attr->ndev)) {
+ roce_set_bit(context->byte_76_srqn_op_en,
+ V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1);
+ roce_set_bit(qpc_mask->byte_76_srqn_op_en,
+ V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0);
+ roce_set_bit(context->byte_168_irrl_idx,
+ V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1);
+ roce_set_bit(qpc_mask->byte_168_irrl_idx,
+ V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0);
+ }
+
roce_set_field(context->byte_24_mtu_tc,
V2_QPC_BYTE_24_VLAN_ID_M,
V2_QPC_BYTE_24_VLAN_ID_S, vlan);
@@ -3584,8 +3769,15 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
V2_QPC_BYTE_24_HOP_LIMIT_M,
V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
- roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
- V2_QPC_BYTE_24_TC_S, grh->traffic_class);
+ if (hr_dev->pci_dev->revision == 0x21 &&
+ gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ roce_set_field(context->byte_24_mtu_tc,
+ V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
+ grh->traffic_class >> 2);
+ else
+ roce_set_field(context->byte_24_mtu_tc,
+ V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
+ grh->traffic_class);
roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
V2_QPC_BYTE_24_TC_S, 0);
roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
@@ -3606,9 +3798,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
/* Every status migrate must change state */
- roce_set_field(context->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M,
+ roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
V2_QPC_BYTE_60_QP_ST_S, new_state);
- roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M,
+ roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
V2_QPC_BYTE_60_QP_ST_S, 0);
/* SW pass context to HW */
@@ -3728,7 +3920,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
goto out;
}
- state = roce_get_field(context->byte_60_qpst_mapid,
+ state = roce_get_field(context->byte_60_qpst_tempid,
V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S);
tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
if (tmp_qp_state == -1) {
@@ -3995,13 +4187,103 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
{
struct hns_roce_work *irq_work =
container_of(work, struct hns_roce_work, work);
+ struct device *dev = irq_work->hr_dev->dev;
u32 qpn = irq_work->qpn;
+ u32 cqn = irq_work->cqn;
switch (irq_work->event_type) {
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG:
+ dev_info(dev, "Path migrated succeeded.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
+ dev_warn(dev, "Path migration failed.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_COMM_EST:
+ dev_info(dev, "Communication established.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
+ dev_warn(dev, "Send queue drained.\n");
+ break;
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ dev_err(dev, "Local work queue catastrophic error.\n");
+ hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+ switch (irq_work->sub_type) {
+ case HNS_ROCE_LWQCE_QPC_ERROR:
+ dev_err(dev, "QP %d, QPC error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_MTU_ERROR:
+ dev_err(dev, "QP %d, MTU error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
+ dev_err(dev, "QP %d, WQE BA addr error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
+ dev_err(dev, "QP %d, WQE addr error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
+ dev_err(dev, "QP %d, WQE shift error.\n", qpn);
+ break;
+ default:
+ dev_err(dev, "Unhandled sub_event type %d.\n",
+ irq_work->sub_type);
+ break;
+ }
+ break;
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+ dev_err(dev, "Invalid request local work queue error.\n");
+ hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+ break;
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ dev_err(dev, "Local access violation work queue error.\n");
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+ switch (irq_work->sub_type) {
+ case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
+ dev_err(dev, "QP %d, R_key violation.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_LENGTH_ERROR:
+ dev_err(dev, "QP %d, length error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_VA_ERROR:
+ dev_err(dev, "QP %d, VA error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_PD_ERROR:
+ dev_err(dev, "QP %d, PD error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
+ dev_err(dev, "QP %d, rw acc error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
+ dev_err(dev, "QP %d, key state error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
+ dev_err(dev, "QP %d, MR operation error.\n", qpn);
+ break;
+ default:
+ dev_err(dev, "Unhandled sub_event type %d.\n",
+ irq_work->sub_type);
+ break;
+ }
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+ dev_warn(dev, "SRQ limit reach.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
+ dev_warn(dev, "SRQ last wqe reach.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ dev_err(dev, "SRQ catas error.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
+ dev_err(dev, "CQ 0x%x access err.\n", cqn);
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
+ dev_warn(dev, "CQ 0x%x overflow\n", cqn);
+ break;
+ case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
+ dev_warn(dev, "DB overflow.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_FLR:
+ dev_warn(dev, "Function level reset.\n");
break;
default:
break;
@@ -4011,7 +4293,8 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
}
static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq, u32 qpn)
+ struct hns_roce_eq *eq,
+ u32 qpn, u32 cqn)
{
struct hns_roce_work *irq_work;
@@ -4022,6 +4305,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle);
irq_work->hr_dev = hr_dev;
irq_work->qpn = qpn;
+ irq_work->cqn = cqn;
irq_work->event_type = eq->event_type;
irq_work->sub_type = eq->sub_type;
queue_work(hr_dev->irq_workq, &(irq_work->work));
@@ -4058,124 +4342,6 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
hns_roce_write64_k(doorbell, eq->doorbell);
}
-static void hns_roce_v2_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- u32 qpn)
-{
- struct device *dev = hr_dev->dev;
- int sub_type;
-
- dev_warn(dev, "Local work queue catastrophic error.\n");
- sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
- HNS_ROCE_V2_AEQE_SUB_TYPE_S);
- switch (sub_type) {
- case HNS_ROCE_LWQCE_QPC_ERROR:
- dev_warn(dev, "QP %d, QPC error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_MTU_ERROR:
- dev_warn(dev, "QP %d, MTU error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
- dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
- dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
- dev_warn(dev, "QP %d, WQE shift error.\n", qpn);
- break;
- default:
- dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
- break;
- }
-}
-
-static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe, u32 qpn)
-{
- struct device *dev = hr_dev->dev;
- int sub_type;
-
- dev_warn(dev, "Local access violation work queue error.\n");
- sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
- HNS_ROCE_V2_AEQE_SUB_TYPE_S);
- switch (sub_type) {
- case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
- dev_warn(dev, "QP %d, R_key violation.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_LENGTH_ERROR:
- dev_warn(dev, "QP %d, length error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_VA_ERROR:
- dev_warn(dev, "QP %d, VA error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_PD_ERROR:
- dev_err(dev, "QP %d, PD error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
- dev_warn(dev, "QP %d, rw acc error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
- dev_warn(dev, "QP %d, key state error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
- dev_warn(dev, "QP %d, MR operation error.\n", qpn);
- break;
- default:
- dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
- break;
- }
-}
-
-static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- int event_type, u32 qpn)
-{
- struct device *dev = hr_dev->dev;
-
- switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_COMM_EST:
- dev_warn(dev, "Communication established.\n");
- break;
- case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
- dev_warn(dev, "Send queue drained.\n");
- break;
- case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
- hns_roce_v2_wq_catas_err_handle(hr_dev, aeqe, qpn);
- break;
- case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
- dev_warn(dev, "Invalid request local work queue error.\n");
- break;
- case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
- hns_roce_v2_local_wq_access_err_handle(hr_dev, aeqe, qpn);
- break;
- default:
- break;
- }
-
- hns_roce_qp_event(hr_dev, qpn, event_type);
-}
-
-static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- int event_type, u32 cqn)
-{
- struct device *dev = hr_dev->dev;
-
- switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
- dev_warn(dev, "CQ 0x%x access err.\n", cqn);
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- dev_warn(dev, "CQ 0x%x overflow\n", cqn);
- break;
- default:
- break;
- }
-
- hns_roce_cq_event(hr_dev, cqn, event_type);
-}
-
static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
{
u32 buf_chk_sz;
@@ -4251,31 +4417,23 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
- dev_warn(dev, "Path migrated succeeded.\n");
- break;
case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
- dev_warn(dev, "Path migration failed.\n");
- break;
case HNS_ROCE_EVENT_TYPE_COMM_EST:
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
- hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type,
- qpn);
+ hns_roce_qp_event(hr_dev, qpn, event_type);
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
- dev_warn(dev, "SRQ not support.\n");
break;
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type,
- cqn);
+ hns_roce_cq_event(hr_dev, cqn, event_type);
break;
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
- dev_warn(dev, "DB overflow.\n");
break;
case HNS_ROCE_EVENT_TYPE_MB:
hns_roce_cmd_event(hr_dev,
@@ -4284,10 +4442,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
le64_to_cpu(aeqe->event.cmd.out_param));
break;
case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
- dev_warn(dev, "CEQ overflow.\n");
break;
case HNS_ROCE_EVENT_TYPE_FLR:
- dev_warn(dev, "Function level reset.\n");
break;
default:
dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n",
@@ -4304,7 +4460,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
dev_warn(dev, "cons_index overflow, set back to 0.\n");
eq->cons_index = 0;
}
- hns_roce_v2_init_irq_work(hr_dev, eq, qpn);
+ hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn);
}
set_eq_cons_index_v2(eq);
@@ -5125,6 +5281,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
create_singlethread_workqueue("hns_roce_irq_workqueue");
if (!hr_dev->irq_workq) {
dev_err(dev, "Create irq workqueue failed!\n");
+ ret = -ENOMEM;
goto err_request_irq_fail;
}
@@ -5195,6 +5352,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.set_mac = hns_roce_v2_set_mac,
.write_mtpt = hns_roce_v2_write_mtpt,
.rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt,
+ .frmr_write_mtpt = hns_roce_v2_frmr_write_mtpt,
+ .mw_write_mtpt = hns_roce_v2_mw_write_mtpt,
.write_cqc = hns_roce_v2_write_cqc,
.set_hem = hns_roce_v2_set_hem,
.clear_hem = hns_roce_v2_clear_hem,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 14aa308befef..8bc820635bbd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -50,6 +50,7 @@
#define HNS_ROCE_V2_MAX_CQE_NUM 0x10000
#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100
#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff
+#define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000
#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20
#define HNS_ROCE_V2_UAR_NUM 256
#define HNS_ROCE_V2_PHY_UAR_NUM 1
@@ -78,6 +79,7 @@
#define HNS_ROCE_INVALID_LKEY 0x100
#define HNS_ROCE_CMQ_TX_TIMEOUT 30000
#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2
+#define HNS_ROCE_V2_RSV_QPS 8
#define HNS_ROCE_CONTEXT_HOP_NUM 1
#define HNS_ROCE_MTT_HOP_NUM 1
@@ -201,6 +203,7 @@ enum {
/* CMQ command */
enum hns_roce_opcode_type {
+ HNS_QUERY_FW_VER = 0x0001,
HNS_ROCE_OPC_QUERY_HW_VER = 0x8000,
HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001,
HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004,
@@ -324,6 +327,7 @@ struct hns_roce_v2_cq_context {
enum{
V2_MPT_ST_VALID = 0x1,
+ V2_MPT_ST_FREE = 0x2,
};
enum hns_roce_v2_qp_state {
@@ -350,7 +354,7 @@ struct hns_roce_v2_qp_context {
__le32 dmac;
__le32 byte_52_udpspn_dmac;
__le32 byte_56_dqpn_err;
- __le32 byte_60_qpst_mapid;
+ __le32 byte_60_qpst_tempid;
__le32 qkey_xrcd;
__le32 byte_68_rq_db;
__le32 rq_db_record_addr;
@@ -492,26 +496,15 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_56_LP_PKTN_INI_S 28
#define V2_QPC_BYTE_56_LP_PKTN_INI_M GENMASK(31, 28)
-#define V2_QPC_BYTE_60_MAPID_S 0
-#define V2_QPC_BYTE_60_MAPID_M GENMASK(12, 0)
+#define V2_QPC_BYTE_60_TEMPID_S 0
+#define V2_QPC_BYTE_60_TEMPID_M GENMASK(7, 0)
-#define V2_QPC_BYTE_60_INNER_MAP_IND_S 13
+#define V2_QPC_BYTE_60_SCC_TOKEN_S 8
+#define V2_QPC_BYTE_60_SCC_TOKEN_M GENMASK(26, 8)
-#define V2_QPC_BYTE_60_SQ_MAP_IND_S 14
+#define V2_QPC_BYTE_60_SQ_DB_DOING_S 27
-#define V2_QPC_BYTE_60_RQ_MAP_IND_S 15
-
-#define V2_QPC_BYTE_60_TEMPID_S 16
-#define V2_QPC_BYTE_60_TEMPID_M GENMASK(22, 16)
-
-#define V2_QPC_BYTE_60_EXT_MAP_IND_S 23
-
-#define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S 24
-#define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M GENMASK(26, 24)
-
-#define V2_QPC_BYTE_60_SQ_RLS_IND_S 27
-
-#define V2_QPC_BYTE_60_SQ_EXT_IND_S 28
+#define V2_QPC_BYTE_60_RQ_DB_DOING_S 28
#define V2_QPC_BYTE_60_QP_ST_S 29
#define V2_QPC_BYTE_60_QP_ST_M GENMASK(31, 29)
@@ -534,6 +527,7 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_76_RQIE_S 28
+#define V2_QPC_BYTE_76_RQ_VLAN_EN_S 30
#define V2_QPC_BYTE_80_RX_CQN_S 0
#define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0)
@@ -588,7 +582,7 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_140_RR_MAX_S 12
#define V2_QPC_BYTE_140_RR_MAX_M GENMASK(14, 12)
-#define V2_QPC_BYTE_140_RSVD_RAQ_MAP_S 15
+#define V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S 15
#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S 16
#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M GENMASK(23, 16)
@@ -599,8 +593,6 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S 0
#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M GENMASK(23, 0)
-#define V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S 24
-
#define V2_QPC_BYTE_144_RAQ_CREDIT_S 25
#define V2_QPC_BYTE_144_RAQ_CREDIT_M GENMASK(29, 25)
@@ -637,9 +629,10 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_168_LP_SGEN_INI_S 22
#define V2_QPC_BYTE_168_LP_SGEN_INI_M GENMASK(23, 22)
-#define V2_QPC_BYTE_168_SQ_SHIFT_BAK_S 24
-#define V2_QPC_BYTE_168_SQ_SHIFT_BAK_M GENMASK(27, 24)
-
+#define V2_QPC_BYTE_168_SQ_VLAN_EN_S 24
+#define V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S 25
+#define V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S 26
+#define V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S 27
#define V2_QPC_BYTE_168_IRRL_IDX_LSB_S 28
#define V2_QPC_BYTE_168_IRRL_IDX_LSB_M GENMASK(31, 28)
@@ -725,6 +718,10 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_232_IRRL_SGE_IDX_S 20
#define V2_QPC_BYTE_232_IRRL_SGE_IDX_M GENMASK(28, 20)
+#define V2_QPC_BYTE_232_SO_LP_VLD_S 29
+#define V2_QPC_BYTE_232_FENCE_LP_VLD_S 30
+#define V2_QPC_BYTE_232_IRRL_LP_VLD_S 31
+
#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_S 0
#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_M GENMASK(7, 0)
@@ -743,6 +740,9 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_244_RNR_CNT_S 27
#define V2_QPC_BYTE_244_RNR_CNT_M GENMASK(29, 27)
+#define V2_QPC_BYTE_244_LCL_OP_FLG_S 30
+#define V2_QPC_BYTE_244_IRRL_RD_FLG_S 31
+
#define V2_QPC_BYTE_248_IRRL_PSN_S 0
#define V2_QPC_BYTE_248_IRRL_PSN_M GENMASK(23, 0)
@@ -818,6 +818,11 @@ struct hns_roce_v2_cqe {
#define V2_CQE_BYTE_28_PORT_TYPE_S 16
#define V2_CQE_BYTE_28_PORT_TYPE_M GENMASK(17, 16)
+#define V2_CQE_BYTE_28_VID_S 18
+#define V2_CQE_BYTE_28_VID_M GENMASK(29, 18)
+
+#define V2_CQE_BYTE_28_VID_VLD_S 30
+
#define V2_CQE_BYTE_32_RMT_QPN_S 0
#define V2_CQE_BYTE_32_RMT_QPN_M GENMASK(23, 0)
@@ -878,8 +883,19 @@ struct hns_roce_v2_mpt_entry {
#define V2_MPT_BYTE_8_LW_EN_S 7
+#define V2_MPT_BYTE_8_MW_CNT_S 8
+#define V2_MPT_BYTE_8_MW_CNT_M GENMASK(31, 8)
+
+#define V2_MPT_BYTE_12_FRE_S 0
+
#define V2_MPT_BYTE_12_PA_S 1
+#define V2_MPT_BYTE_12_MR_MW_S 4
+
+#define V2_MPT_BYTE_12_BPD_S 5
+
+#define V2_MPT_BYTE_12_BQP_S 6
+
#define V2_MPT_BYTE_12_INNER_PA_VLD_S 7
#define V2_MPT_BYTE_12_MW_BIND_QPN_S 8
@@ -988,6 +1004,8 @@ struct hns_roce_v2_ud_send_wqe {
#define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24
#define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24)
+#define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30
+
#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31
#define V2_UD_SEND_WQE_DMAC_0_S 0
@@ -1042,6 +1060,16 @@ struct hns_roce_v2_rc_send_wqe {
#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12
+#define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19
+
+#define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20
+
+#define V2_RC_FRMR_WQE_BYTE_4_RR_S 21
+
+#define V2_RC_FRMR_WQE_BYTE_4_RW_S 22
+
+#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23
+
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)
@@ -1051,6 +1079,16 @@ struct hns_roce_v2_rc_send_wqe {
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
+struct hns_roce_wqe_frmr_seg {
+ __le32 pbl_size;
+ __le32 mode_buf_pg_sz;
+};
+
+#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S 4
+#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M GENMASK(7, 4)
+
+#define V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S 8
+
struct hns_roce_v2_wqe_data_seg {
__le32 len;
__le32 lkey;
@@ -1068,6 +1106,11 @@ struct hns_roce_query_version {
__le32 rsv[5];
};
+struct hns_roce_query_fw_info {
+ __le32 fw_ver;
+ __le32 rsv[5];
+};
+
struct hns_roce_cfg_llm_a {
__le32 base_addr_l;
__le32 base_addr_h;
@@ -1564,4 +1607,9 @@ struct hns_roce_eq_context {
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
+struct hns_roce_wqe_atomic_seg {
+ __le64 fetchadd_swap_data;
+ __le64 cmp_data;
+};
+
#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index c5cae9a38c04..1b3ee514f2ef 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -196,6 +196,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
memset(props, 0, sizeof(*props));
+ props->fw_ver = hr_dev->caps.fw_ver;
props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid);
props->max_mr_size = (u64)(~(0ULL));
props->page_size_cap = hr_dev->caps.page_size_cap;
@@ -215,7 +216,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
props->max_pd = hr_dev->caps.num_pds;
props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma;
props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma;
- props->atomic_cap = IB_ATOMIC_NONE;
+ props->atomic_cap = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_ATOMIC ?
+ IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->max_pkeys = 1;
props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
@@ -344,8 +346,6 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
if (ret)
goto error_fail_uar_alloc;
- INIT_LIST_HEAD(&context->vma_list);
- mutex_init(&context->vma_list_mutex);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
INIT_LIST_HEAD(&context->page_list);
mutex_init(&context->page_mutex);
@@ -376,76 +376,34 @@ static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
return 0;
}
-static void hns_roce_vma_open(struct vm_area_struct *vma)
-{
- vma->vm_ops = NULL;
-}
-
-static void hns_roce_vma_close(struct vm_area_struct *vma)
-{
- struct hns_roce_vma_data *vma_data;
-
- vma_data = (struct hns_roce_vma_data *)vma->vm_private_data;
- vma_data->vma = NULL;
- mutex_lock(vma_data->vma_list_mutex);
- list_del(&vma_data->list);
- mutex_unlock(vma_data->vma_list_mutex);
- kfree(vma_data);
-}
-
-static const struct vm_operations_struct hns_roce_vm_ops = {
- .open = hns_roce_vma_open,
- .close = hns_roce_vma_close,
-};
-
-static int hns_roce_set_vma_data(struct vm_area_struct *vma,
- struct hns_roce_ucontext *context)
-{
- struct list_head *vma_head = &context->vma_list;
- struct hns_roce_vma_data *vma_data;
-
- vma_data = kzalloc(sizeof(*vma_data), GFP_KERNEL);
- if (!vma_data)
- return -ENOMEM;
-
- vma_data->vma = vma;
- vma_data->vma_list_mutex = &context->vma_list_mutex;
- vma->vm_private_data = vma_data;
- vma->vm_ops = &hns_roce_vm_ops;
-
- mutex_lock(&context->vma_list_mutex);
- list_add(&vma_data->list, vma_head);
- mutex_unlock(&context->vma_list_mutex);
-
- return 0;
-}
-
static int hns_roce_mmap(struct ib_ucontext *context,
struct vm_area_struct *vma)
{
struct hns_roce_dev *hr_dev = to_hr_dev(context->device);
- if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0)
- return -EINVAL;
+ switch (vma->vm_pgoff) {
+ case 0:
+ return rdma_user_mmap_io(context, vma,
+ to_hr_ucontext(context)->uar.pfn,
+ PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot));
+
+ /* vm_pgoff: 1 -- TPTR */
+ case 1:
+ if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size)
+ return -EINVAL;
+ /*
+ * FIXME: using io_remap_pfn_range on the dma address returned
+ * by dma_alloc_coherent is totally wrong.
+ */
+ return rdma_user_mmap_io(context, vma,
+ hr_dev->tptr_dma_addr >> PAGE_SHIFT,
+ hr_dev->tptr_size,
+ vma->vm_page_prot);
- if (vma->vm_pgoff == 0) {
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- if (io_remap_pfn_range(vma, vma->vm_start,
- to_hr_ucontext(context)->uar.pfn,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
- } else if (vma->vm_pgoff == 1 && hr_dev->tptr_dma_addr &&
- hr_dev->tptr_size) {
- /* vm_pgoff: 1 -- TPTR */
- if (io_remap_pfn_range(vma, vma->vm_start,
- hr_dev->tptr_dma_addr >> PAGE_SHIFT,
- hr_dev->tptr_size,
- vma->vm_page_prot))
- return -EAGAIN;
- } else
+ default:
return -EINVAL;
-
- return hns_roce_set_vma_data(vma, to_hr_ucontext(context));
+ }
}
static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
@@ -471,21 +429,6 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
- struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
- struct hns_roce_vma_data *vma_data, *n;
- struct vm_area_struct *vma;
-
- mutex_lock(&context->vma_list_mutex);
- list_for_each_entry_safe(vma_data, n, &context->vma_list, list) {
- vma = vma_data->vma;
- zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
-
- vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
- vma->vm_ops = NULL;
- list_del(&vma_data->list);
- kfree(vma_data);
- }
- mutex_unlock(&context->vma_list_mutex);
}
static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
@@ -508,7 +451,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
spin_lock_init(&iboe->lock);
ib_dev = &hr_dev->ib_dev;
- strlcpy(ib_dev->name, "hns_%d", IB_DEVICE_NAME_MAX);
ib_dev->owner = THIS_MODULE;
ib_dev->node_type = RDMA_NODE_IB_CA;
@@ -584,12 +526,27 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR);
}
+ /* MW */
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) {
+ ib_dev->alloc_mw = hns_roce_alloc_mw;
+ ib_dev->dealloc_mw = hns_roce_dealloc_mw;
+ ib_dev->uverbs_cmd_mask |=
+ (1ULL << IB_USER_VERBS_CMD_ALLOC_MW) |
+ (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW);
+ }
+
+ /* FRMR */
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) {
+ ib_dev->alloc_mr = hns_roce_alloc_mr;
+ ib_dev->map_mr_sg = hns_roce_map_mr_sg;
+ }
+
/* OTHERS */
ib_dev->get_port_immutable = hns_roce_port_immutable;
ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext;
ib_dev->driver_id = RDMA_DRIVER_HNS;
- ret = ib_register_device(ib_dev, NULL);
+ ret = ib_register_device(ib_dev, "hns_%d", NULL);
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
return ret;
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index eb26a5f6fc58..521ad2aa3a4e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -329,7 +329,7 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
u64 bt_idx;
u64 size;
- mhop_num = hr_dev->caps.pbl_hop_num;
+ mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
@@ -351,7 +351,7 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
mr->pbl_size = npages;
mr->pbl_ba = mr->pbl_dma_addr;
- mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
+ mr->pbl_hop_num = mhop_num;
mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
return 0;
@@ -511,7 +511,6 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
mr->key = hw_index_to_key(index); /* MR key */
if (size == ~0ull) {
- mr->type = MR_TYPE_DMA;
mr->pbl_buf = NULL;
mr->pbl_dma_addr = 0;
/* PBL multi-hop addressing parameters */
@@ -522,7 +521,6 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
mr->pbl_l1_dma_addr = NULL;
mr->pbl_l0_dma_addr = 0;
} else {
- mr->type = MR_TYPE_MR;
if (!hr_dev->caps.pbl_hop_num) {
mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
&(mr->pbl_dma_addr),
@@ -548,9 +546,9 @@ static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
u32 mhop_num;
u64 bt_idx;
- npages = ib_umem_page_count(mr->umem);
+ npages = mr->pbl_size;
pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
- mhop_num = hr_dev->caps.pbl_hop_num;
+ mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num;
if (mhop_num == HNS_ROCE_HOP_NUM_0)
return;
@@ -636,7 +634,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
}
if (mr->size != ~0ULL) {
- npages = ib_umem_page_count(mr->umem);
+ if (mr->type == MR_TYPE_MR)
+ npages = ib_umem_page_count(mr->umem);
if (!hr_dev->caps.pbl_hop_num)
dma_free_coherent(dev, (unsigned int)(npages * 8),
@@ -674,7 +673,10 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
goto err_table;
}
- ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
+ if (mr->type != MR_TYPE_FRMR)
+ ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
+ else
+ ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
if (ret) {
dev_err(dev, "Write mtpt fail!\n");
goto err_page;
@@ -855,6 +857,8 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
if (mr == NULL)
return ERR_PTR(-ENOMEM);
+ mr->type = MR_TYPE_DMA;
+
/* Allocate memory region key */
ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
~0ULL, acc, 0, mr);
@@ -1031,6 +1035,8 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
}
+ mr->type = MR_TYPE_MR;
+
ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
access_flags, n, mr);
if (ret)
@@ -1201,3 +1207,193 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr)
return ret;
}
+
+struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_mr *mr;
+ u64 length;
+ u32 page_size;
+ int ret;
+
+ page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT);
+ length = max_num_sg * page_size;
+
+ if (mr_type != IB_MR_TYPE_MEM_REG)
+ return ERR_PTR(-EINVAL);
+
+ if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
+ dev_err(dev, "max_num_sg larger than %d\n",
+ HNS_ROCE_FRMR_MAX_PA);
+ return ERR_PTR(-EINVAL);
+ }
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->type = MR_TYPE_FRMR;
+
+ /* Allocate memory region key */
+ ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length,
+ 0, max_num_sg, mr);
+ if (ret)
+ goto err_free;
+
+ ret = hns_roce_mr_enable(hr_dev, mr);
+ if (ret)
+ goto err_mr;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
+ mr->umem = NULL;
+
+ return &mr->ibmr;
+
+err_mr:
+ hns_roce_mr_free(to_hr_dev(pd->device), mr);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(ret);
+}
+
+static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
+
+ mr->pbl_buf[mr->npages++] = cpu_to_le64(addr);
+
+ return 0;
+}
+
+int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
+{
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
+
+ mr->npages = 0;
+
+ return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
+}
+
+static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mw *mw)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ if (mw->enabled) {
+ ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey)
+ & (hr_dev->caps.num_mtpts - 1));
+ if (ret)
+ dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret);
+
+ hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
+ key_to_hw_index(mw->rkey));
+ }
+
+ hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
+ key_to_hw_index(mw->rkey), BITMAP_NO_RR);
+}
+
+static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mw *mw)
+{
+ struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
+ struct hns_roce_cmd_mailbox *mailbox;
+ struct device *dev = hr_dev->dev;
+ unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
+ int ret;
+
+ /* prepare HEM entry memory */
+ ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
+ if (ret)
+ return ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ ret = PTR_ERR(mailbox);
+ goto err_table;
+ }
+
+ ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
+ if (ret) {
+ dev_err(dev, "MW write mtpt fail!\n");
+ goto err_page;
+ }
+
+ ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
+ mtpt_idx & (hr_dev->caps.num_mtpts - 1));
+ if (ret) {
+ dev_err(dev, "MW sw2hw_mpt failed (%d)\n", ret);
+ goto err_page;
+ }
+
+ mw->enabled = 1;
+
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return 0;
+
+err_page:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+err_table:
+ hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
+
+ return ret;
+}
+
+struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device);
+ struct hns_roce_mw *mw;
+ unsigned long index = 0;
+ int ret;
+
+ mw = kmalloc(sizeof(*mw), GFP_KERNEL);
+ if (!mw)
+ return ERR_PTR(-ENOMEM);
+
+ /* Allocate a key for mw from bitmap */
+ ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
+ if (ret)
+ goto err_bitmap;
+
+ mw->rkey = hw_index_to_key(index);
+
+ mw->ibmw.rkey = mw->rkey;
+ mw->ibmw.type = type;
+ mw->pdn = to_hr_pd(ib_pd)->pdn;
+ mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
+ mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
+ mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
+
+ ret = hns_roce_mw_enable(hr_dev, mw);
+ if (ret)
+ goto err_mw;
+
+ return &mw->ibmw;
+
+err_mw:
+ hns_roce_mw_free(hr_dev, mw);
+
+err_bitmap:
+ kfree(mw);
+
+ return ERR_PTR(ret);
+}
+
+int hns_roce_dealloc_mw(struct ib_mw *ibmw)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
+ struct hns_roce_mw *mw = to_hr_mw(ibmw);
+
+ hns_roce_mw_free(hr_dev, mw);
+ kfree(mw);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index efb7e961ca65..5ebf481a39d9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <linux/pci.h>
#include <linux/platform_device.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_umem.h>
@@ -343,6 +344,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
{
u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
u8 max_sq_stride = ilog2(roundup_sq_stride);
+ u32 ex_sge_num;
u32 page_size;
u32 max_cnt;
@@ -372,7 +374,18 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
if (hr_qp->sq.max_gs > 2)
hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
(hr_qp->sq.max_gs - 2));
+
+ if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision == 0x20)) {
+ if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
+ dev_err(hr_dev->dev,
+ "The extended sge cnt error! sge_cnt=%d\n",
+ hr_qp->sge.sge_cnt);
+ return -EINVAL;
+ }
+ }
+
hr_qp->sge.sge_shift = 4;
+ ex_sge_num = hr_qp->sge.sge_cnt;
/* Get buf size, SQ and RQ are aligned to page_szie */
if (hr_dev->caps.max_sq_sg <= 2) {
@@ -386,6 +399,8 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sq.wqe_shift), PAGE_SIZE);
} else {
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
+ hr_qp->sge.sge_cnt =
+ max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num);
hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
hr_qp->rq.wqe_shift), page_size) +
HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
@@ -394,7 +409,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sq.wqe_shift), page_size);
hr_qp->sq.offset = 0;
- if (hr_qp->sge.sge_cnt) {
+ if (ex_sge_num) {
hr_qp->sge.offset = HNS_ROCE_ALOGN_UP(
(hr_qp->sq.wqe_cnt <<
hr_qp->sq.wqe_shift),
@@ -465,6 +480,14 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sge.sge_shift = 4;
}
+ if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) {
+ if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
+ dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
+ hr_qp->sge.sge_cnt);
+ return -EINVAL;
+ }
+ }
+
/* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
hr_qp->sq.offset = 0;
@@ -472,6 +495,8 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
page_size);
if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) {
+ hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift),
+ (u32)hr_qp->sge.sge_cnt);
hr_qp->sge.offset = size;
size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt <<
hr_qp->sge.sge_shift, page_size);
@@ -952,8 +977,8 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
}
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_ETHERNET)) {
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+ attr_mask)) {
dev_err(dev, "ib_modify_qp_is_ok failed\n");
goto out;
}
@@ -1106,14 +1131,20 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
int reserved_from_top = 0;
+ int reserved_from_bot;
int ret;
spin_lock_init(&qp_table->lock);
INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC);
- /* A port include two SQP, six port total 12 */
+ /* In hw v1, a port include two SQP, six ports total 12 */
+ if (hr_dev->caps.max_sq_sg <= 2)
+ reserved_from_bot = SQP_NUM;
+ else
+ reserved_from_bot = hr_dev->caps.reserved_qps;
+
ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps,
- hr_dev->caps.num_qps - 1, SQP_NUM,
+ hr_dev->caps.num_qps - 1, reserved_from_bot,
reserved_from_top);
if (ret) {
dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n",
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 423818a7d333..771eb6bd0785 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -1689,7 +1689,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev,
unsigned long flags;
rtnl_lock();
- for_each_netdev_rcu(&init_net, ip_dev) {
+ for_each_netdev(&init_net, ip_dev) {
if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) &&
(rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
(ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index e2e6c74a7452..102875872bea 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -2135,10 +2135,10 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
}
/**
- * i40iw_show_rev
+ * hw_rev_show
*/
-static ssize_t i40iw_show_rev(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t hw_rev_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct i40iw_ib_device *iwibdev = container_of(dev,
struct i40iw_ib_device,
@@ -2147,34 +2147,37 @@ static ssize_t i40iw_show_rev(struct device *dev,
return sprintf(buf, "%x\n", hw_rev);
}
+static DEVICE_ATTR_RO(hw_rev);
/**
- * i40iw_show_hca
+ * hca_type_show
*/
-static ssize_t i40iw_show_hca(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t hca_type_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
return sprintf(buf, "I40IW\n");
}
+static DEVICE_ATTR_RO(hca_type);
/**
- * i40iw_show_board
+ * board_id_show
*/
-static ssize_t i40iw_show_board(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%.*s\n", 32, "I40IW Board ID");
}
+static DEVICE_ATTR_RO(board_id);
-static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL);
+static struct attribute *i40iw_dev_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
+};
-static struct device_attribute *i40iw_dev_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id
+static const struct attribute_group i40iw_attr_group = {
+ .attrs = i40iw_dev_attributes,
};
/**
@@ -2752,7 +2755,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
i40iw_pr_err("iwdev == NULL\n");
return NULL;
}
- strlcpy(iwibdev->ibdev.name, "i40iw%d", IB_DEVICE_NAME_MAX);
iwibdev->ibdev.owner = THIS_MODULE;
iwdev->iwibdev = iwibdev;
iwibdev->iwdev = iwdev;
@@ -2851,20 +2853,6 @@ void i40iw_port_ibevent(struct i40iw_device *iwdev)
}
/**
- * i40iw_unregister_rdma_device - unregister of iwarp from IB
- * @iwibdev: rdma device ptr
- */
-static void i40iw_unregister_rdma_device(struct i40iw_ib_device *iwibdev)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i)
- device_remove_file(&iwibdev->ibdev.dev,
- i40iw_dev_attributes[i]);
- ib_unregister_device(&iwibdev->ibdev);
-}
-
-/**
* i40iw_destroy_rdma_device - destroy rdma device and free resources
* @iwibdev: IB device ptr
*/
@@ -2873,7 +2861,7 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
if (!iwibdev)
return;
- i40iw_unregister_rdma_device(iwibdev);
+ ib_unregister_device(&iwibdev->ibdev);
kfree(iwibdev->ibdev.iwcm);
iwibdev->ibdev.iwcm = NULL;
wait_event_timeout(iwibdev->iwdev->close_wq,
@@ -2888,32 +2876,19 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
*/
int i40iw_register_rdma_device(struct i40iw_device *iwdev)
{
- int i, ret;
+ int ret;
struct i40iw_ib_device *iwibdev;
iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
if (!iwdev->iwibdev)
return -ENOMEM;
iwibdev = iwdev->iwibdev;
-
+ rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW;
- ret = ib_register_device(&iwibdev->ibdev, NULL);
+ ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", NULL);
if (ret)
goto error;
- for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i) {
- ret =
- device_create_file(&iwibdev->ibdev.dev,
- i40iw_dev_attributes[i]);
- if (ret) {
- while (i > 0) {
- i--;
- device_remove_file(&iwibdev->ibdev.dev, i40iw_dev_attributes[i]);
- }
- ib_unregister_device(&iwibdev->ibdev);
- goto error;
- }
- }
return 0;
error:
kfree(iwdev->iwibdev->ibdev.iwcm);
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
index db4aa13ebae0..d1de3285fd88 100644
--- a/drivers/infiniband/hw/mlx4/Kconfig
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -1,6 +1,7 @@
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
depends on NETDEVICES && ETHERNET && PCI && INET
+ depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
depends on MAY_USE_DEVLINK
select NET_VENDOR_MELLANOX
select MLX4_CORE
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index e5466d786bb1..8942f5f7f04d 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -807,15 +807,17 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
int err;
struct ib_port_attr pattr;
- if (in_wc && in_wc->qp->qp_num) {
- pr_debug("received MAD: slid:%d sqpn:%d "
- "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n",
- in_wc->slid, in_wc->src_qp,
- in_wc->dlid_path_bits,
- in_wc->qp->qp_num,
- in_wc->wc_flags,
- in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
- be16_to_cpu(in_mad->mad_hdr.attr_id));
+ if (in_wc && in_wc->qp) {
+ pr_debug("received MAD: port:%d slid:%d sqpn:%d "
+ "dlid_bits:%d dqpn:%d wc_flags:0x%x tid:%016llx cls:%x mtd:%x atr:%x\n",
+ port_num,
+ in_wc->slid, in_wc->src_qp,
+ in_wc->dlid_path_bits,
+ in_wc->qp->qp_num,
+ in_wc->wc_flags,
+ be64_to_cpu(in_mad->mad_hdr.tid),
+ in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
+ be16_to_cpu(in_mad->mad_hdr.attr_id));
if (in_wc->wc_flags & IB_WC_GRH) {
pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
be64_to_cpu(in_grh->sgid.global.subnet_prefix),
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index ca0f1ee26091..0def2323459c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -517,9 +517,11 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->page_size_cap = dev->dev->caps.page_size_cap;
props->max_qp = dev->dev->quotas.qp;
props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
- props->max_send_sge = dev->dev->caps.max_sq_sg;
- props->max_recv_sge = dev->dev->caps.max_rq_sg;
- props->max_sge_rd = MLX4_MAX_SGE_RD;
+ props->max_send_sge =
+ min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg);
+ props->max_recv_sge =
+ min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg);
+ props->max_sge_rd = MLX4_MAX_SGE_RD;
props->max_cq = dev->dev->quotas.cq;
props->max_cqe = dev->dev->caps.max_cqes;
props->max_mr = dev->dev->quotas.mpt;
@@ -1138,144 +1140,50 @@ static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
return 0;
}
-static void mlx4_ib_vma_open(struct vm_area_struct *area)
-{
- /* vma_open is called when a new VMA is created on top of our VMA.
- * This is done through either mremap flow or split_vma (usually due
- * to mlock, madvise, munmap, etc.). We do not support a clone of the
- * vma, as this VMA is strongly hardware related. Therefore we set the
- * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
- * calling us again and trying to do incorrect actions. We assume that
- * the original vma size is exactly a single page that there will be no
- * "splitting" operations on.
- */
- area->vm_ops = NULL;
-}
-
-static void mlx4_ib_vma_close(struct vm_area_struct *area)
-{
- struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
-
- /* It's guaranteed that all VMAs opened on a FD are closed before the
- * file itself is closed, therefore no sync is needed with the regular
- * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
- * with accessing the vma as part of mlx4_ib_disassociate_ucontext.
- * The close operation is usually called under mm->mmap_sem except when
- * process is exiting. The exiting case is handled explicitly as part
- * of mlx4_ib_disassociate_ucontext.
- */
- mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
- area->vm_private_data;
-
- /* set the vma context pointer to null in the mlx4_ib driver's private
- * data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
- */
- mlx4_ib_vma_priv_data->vma = NULL;
-}
-
-static const struct vm_operations_struct mlx4_ib_vm_ops = {
- .open = mlx4_ib_vma_open,
- .close = mlx4_ib_vma_close
-};
-
static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
- int i;
- struct vm_area_struct *vma;
- struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
-
- /* need to protect from a race on closing the vma as part of
- * mlx4_ib_vma_close().
- */
- for (i = 0; i < HW_BAR_COUNT; i++) {
- vma = context->hw_bar_info[i].vma;
- if (!vma)
- continue;
-
- zap_vma_ptes(context->hw_bar_info[i].vma,
- context->hw_bar_info[i].vma->vm_start, PAGE_SIZE);
-
- context->hw_bar_info[i].vma->vm_flags &=
- ~(VM_SHARED | VM_MAYSHARE);
- /* context going to be destroyed, should not access ops any more */
- context->hw_bar_info[i].vma->vm_ops = NULL;
- }
-}
-
-static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
- struct mlx4_ib_vma_private_data *vma_private_data)
-{
- vma_private_data->vma = vma;
- vma->vm_private_data = vma_private_data;
- vma->vm_ops = &mlx4_ib_vm_ops;
}
static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct mlx4_ib_dev *dev = to_mdev(context->device);
- struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
-
- if (vma->vm_end - vma->vm_start != PAGE_SIZE)
- return -EINVAL;
-
- if (vma->vm_pgoff == 0) {
- /* We prevent double mmaping on same context */
- if (mucontext->hw_bar_info[HW_BAR_DB].vma)
- return -EINVAL;
-
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- if (io_remap_pfn_range(vma, vma->vm_start,
- to_mucontext(context)->uar.pfn,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
+ switch (vma->vm_pgoff) {
+ case 0:
+ return rdma_user_mmap_io(context, vma,
+ to_mucontext(context)->uar.pfn,
+ PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot));
- mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
-
- } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
- /* We prevent double mmaping on same context */
- if (mucontext->hw_bar_info[HW_BAR_BF].vma)
+ case 1:
+ if (dev->dev->caps.bf_reg_size == 0)
return -EINVAL;
+ return rdma_user_mmap_io(
+ context, vma,
+ to_mucontext(context)->uar.pfn +
+ dev->dev->caps.num_uars,
+ PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot));
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-
- if (io_remap_pfn_range(vma, vma->vm_start,
- to_mucontext(context)->uar.pfn +
- dev->dev->caps.num_uars,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
-
- } else if (vma->vm_pgoff == 3) {
+ case 3: {
struct mlx4_clock_params params;
int ret;
- /* We prevent double mmaping on same context */
- if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
- return -EINVAL;
-
ret = mlx4_get_internal_clock_params(dev->dev, &params);
-
if (ret)
return ret;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- if (io_remap_pfn_range(vma, vma->vm_start,
- (pci_resource_start(dev->dev->persist->pdev,
- params.bar) +
- params.offset)
- >> PAGE_SHIFT,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- mlx4_ib_set_vma_data(vma,
- &mucontext->hw_bar_info[HW_BAR_CLOCK]);
- } else {
- return -EINVAL;
+ return rdma_user_mmap_io(
+ context, vma,
+ (pci_resource_start(dev->dev->persist->pdev,
+ params.bar) +
+ params.offset) >>
+ PAGE_SHIFT,
+ PAGE_SIZE, pgprot_noncached(vma->vm_page_prot));
}
- return 0;
+ default:
+ return -EINVAL;
+ }
}
static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
@@ -2131,39 +2039,43 @@ out:
return err;
}
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "%x\n", dev->dev->rev_id);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
dev->dev->board_id);
}
+static DEVICE_ATTR_RO(board_id);
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static struct attribute *mlx4_class_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
+};
-static struct device_attribute *mlx4_class_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id
+static const struct attribute_group mlx4_attr_group = {
+ .attrs = mlx4_class_attributes,
};
struct diag_counter {
@@ -2634,7 +2546,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->dev = dev;
ibdev->bond_next_port = 0;
- strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
@@ -2896,8 +2807,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_alloc_diag_counters(ibdev))
goto err_steer_free_bitmap;
+ rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group);
ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4;
- if (ib_register_device(&ibdev->ib_dev, NULL))
+ if (ib_register_device(&ibdev->ib_dev, "mlx4_%d", NULL))
goto err_diag_counters;
if (mlx4_ib_mad_init(ibdev))
@@ -2920,12 +2832,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_notif;
}
- for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
- if (device_create_file(&ibdev->ib_dev.dev,
- mlx4_class_attributes[j]))
- goto err_notif;
- }
-
ibdev->ib_active = true;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i),
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 81ffc007e0a1..d844831179cf 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -673,7 +673,7 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
if (!list_empty(&group->pending_list))
req = list_first_entry(&group->pending_list,
struct mcast_req, group_list);
- if ((method == IB_MGMT_METHOD_GET_RESP)) {
+ if (method == IB_MGMT_METHOD_GET_RESP) {
if (req) {
send_reply_to_slave(req->func, group, &req->sa_mad, status);
--group->func[req->func].num_pend_reqs;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index e10dccc7958f..8850dfc3826d 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -80,16 +80,11 @@ enum hw_bar_type {
HW_BAR_COUNT
};
-struct mlx4_ib_vma_private_data {
- struct vm_area_struct *vma;
-};
-
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
struct mlx4_uar uar;
struct list_head db_page_list;
struct mutex db_page_mutex;
- struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT];
struct list_head wqn_ranges_list;
struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */
};
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 6dd3cd2c2f80..0711ca1dfb8f 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2629,7 +2629,6 @@ enum {
static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
- enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
@@ -2639,13 +2638,8 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (cur_state != new_state || cur_state != IB_QPS_RESET) {
- int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- ll = rdma_port_get_link_layer(&dev->ib_dev, port);
- }
-
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
- attr_mask, ll)) {
+ attr_mask)) {
pr_debug("qpn 0x%x: invalid attribute mask specified "
"for transition %d to %d. qp_type %d,"
" attr_mask 0x%x\n",
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index e219093d2764..752bdd536130 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -818,9 +818,7 @@ int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
if (!mlx4_is_master(dev->dev))
return 0;
- dev->iov_parent =
- kobject_create_and_add("iov",
- kobject_get(dev->ib_dev.ports_parent->parent));
+ dev->iov_parent = kobject_create_and_add("iov", &dev->ib_dev.dev.kobj);
if (!dev->iov_parent) {
ret = -ENOMEM;
goto err;
@@ -850,7 +848,6 @@ err_add_entries:
err_ports:
kobject_put(dev->iov_parent);
err:
- kobject_put(dev->ib_dev.ports_parent->parent);
pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret);
return ret;
}
@@ -886,5 +883,4 @@ void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device)
kobject_put(device->ports_parent);
kobject_put(device->iov_parent);
kobject_put(device->iov_parent);
- kobject_put(device->ib_dev.ports_parent->parent);
}
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index c84fef9a8a08..ca060a2e2b36 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -197,3 +197,132 @@ int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT,
0, 0);
}
+
+void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {};
+
+ MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR);
+ MLX5_SET(destroy_tir_in, in, tirn, tirn);
+ MLX5_SET(destroy_tir_in, in, uid, uid);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0};
+
+ MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, in, tisn, tisn);
+ MLX5_SET(destroy_tis_in, in, uid, uid);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {};
+
+ MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
+ MLX5_SET(destroy_rqt_in, in, uid, uid);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
+ u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0};
+ int err;
+
+ MLX5_SET(alloc_transport_domain_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *tdn = MLX5_GET(alloc_transport_domain_out, out,
+ transport_domain);
+
+ return err;
+}
+
+void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
+ u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0};
+
+ MLX5_SET(dealloc_transport_domain_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+ MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid)
+{
+ u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {};
+
+ MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
+ MLX5_SET(dealloc_pd_in, in, pd, pdn);
+ MLX5_SET(dealloc_pd_in, in, uid, uid);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
+ u32 qpn, u16 uid)
+{
+ u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {};
+ void *gid;
+
+ MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG);
+ MLX5_SET(attach_to_mcg_in, in, qpn, qpn);
+ MLX5_SET(attach_to_mcg_in, in, uid, uid);
+ gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid);
+ memcpy(gid, mgid, sizeof(*mgid));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
+ u32 qpn, u16 uid)
+{
+ u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {};
+ void *gid;
+
+ MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+ MLX5_SET(detach_from_mcg_in, in, qpn, qpn);
+ MLX5_SET(detach_from_mcg_in, in, uid, uid);
+ gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid);
+ memcpy(gid, mgid, sizeof(*mgid));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {};
+ int err;
+
+ MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD);
+ MLX5_SET(alloc_xrcd_in, in, uid, uid);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd);
+ return err;
+}
+
+int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid)
+{
+ u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {};
+
+ MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+ MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn);
+ MLX5_SET(dealloc_xrcd_in, in, uid, uid);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 88cbb1c41703..c03c56455534 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -47,4 +47,18 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
u64 length, u32 alignment);
int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length);
+void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
+void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
+void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
+void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid);
+int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
+ u16 uid);
+void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
+ u16 uid);
+int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
+ u32 qpn, u16 uid);
+int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
+ u32 qpn, u16 uid);
+int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
+int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 088205d7f1a1..dae30b6478bf 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -877,6 +877,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD;
}
+ MLX5_SET(create_cq_in, *cqb, uid, to_mucontext(context)->devx_uid);
return 0;
err_cqb:
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 25dafa4ff6ca..61aab7c0c513 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -19,7 +19,7 @@
#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
struct devx_obj {
struct mlx5_core_dev *mdev;
- u32 obj_id;
+ u64 obj_id;
u32 dinlen; /* destroy inbox length */
u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
};
@@ -45,13 +45,14 @@ static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file)
return to_mucontext(ib_uverbs_get_ucontext(file));
}
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev)
{
u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
u64 general_obj_types;
void *hdr;
int err;
+ u16 uid;
hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr);
@@ -60,9 +61,6 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *contex
!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM))
return -EINVAL;
- if (!capable(CAP_NET_RAW))
- return -EPERM;
-
MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX);
@@ -70,19 +68,18 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *contex
if (err)
return err;
- context->devx_uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
- return 0;
+ uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ return uid;
}
-void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
- struct mlx5_ib_ucontext *context)
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
{
u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, context->devx_uid);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, uid);
mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
@@ -109,150 +106,218 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
}
}
+/*
+ * As the obj_id in the firmware is not globally unique the object type
+ * must be considered upon checking for a valid object id.
+ * For that the opcode of the creator command is encoded as part of the obj_id.
+ */
+static u64 get_enc_obj_id(u16 opcode, u32 obj_id)
+{
+ return ((u64)opcode << 32) | obj_id;
+}
+
static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
- u32 obj_id;
+ u64 obj_id;
switch (opcode) {
case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
- obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_GENERAL_OBJECT,
+ MLX5_GET(general_obj_in_cmd_hdr, in,
+ obj_id));
break;
case MLX5_CMD_OP_QUERY_MKEY:
- obj_id = MLX5_GET(query_mkey_in, in, mkey_index);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_MKEY,
+ MLX5_GET(query_mkey_in, in,
+ mkey_index));
break;
case MLX5_CMD_OP_QUERY_CQ:
- obj_id = MLX5_GET(query_cq_in, in, cqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
+ MLX5_GET(query_cq_in, in, cqn));
break;
case MLX5_CMD_OP_MODIFY_CQ:
- obj_id = MLX5_GET(modify_cq_in, in, cqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
+ MLX5_GET(modify_cq_in, in, cqn));
break;
case MLX5_CMD_OP_QUERY_SQ:
- obj_id = MLX5_GET(query_sq_in, in, sqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
+ MLX5_GET(query_sq_in, in, sqn));
break;
case MLX5_CMD_OP_MODIFY_SQ:
- obj_id = MLX5_GET(modify_sq_in, in, sqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
+ MLX5_GET(modify_sq_in, in, sqn));
break;
case MLX5_CMD_OP_QUERY_RQ:
- obj_id = MLX5_GET(query_rq_in, in, rqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ MLX5_GET(query_rq_in, in, rqn));
break;
case MLX5_CMD_OP_MODIFY_RQ:
- obj_id = MLX5_GET(modify_rq_in, in, rqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ MLX5_GET(modify_rq_in, in, rqn));
break;
case MLX5_CMD_OP_QUERY_RMP:
- obj_id = MLX5_GET(query_rmp_in, in, rmpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
+ MLX5_GET(query_rmp_in, in, rmpn));
break;
case MLX5_CMD_OP_MODIFY_RMP:
- obj_id = MLX5_GET(modify_rmp_in, in, rmpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
+ MLX5_GET(modify_rmp_in, in, rmpn));
break;
case MLX5_CMD_OP_QUERY_RQT:
- obj_id = MLX5_GET(query_rqt_in, in, rqtn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
+ MLX5_GET(query_rqt_in, in, rqtn));
break;
case MLX5_CMD_OP_MODIFY_RQT:
- obj_id = MLX5_GET(modify_rqt_in, in, rqtn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
+ MLX5_GET(modify_rqt_in, in, rqtn));
break;
case MLX5_CMD_OP_QUERY_TIR:
- obj_id = MLX5_GET(query_tir_in, in, tirn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
+ MLX5_GET(query_tir_in, in, tirn));
break;
case MLX5_CMD_OP_MODIFY_TIR:
- obj_id = MLX5_GET(modify_tir_in, in, tirn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
+ MLX5_GET(modify_tir_in, in, tirn));
break;
case MLX5_CMD_OP_QUERY_TIS:
- obj_id = MLX5_GET(query_tis_in, in, tisn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
+ MLX5_GET(query_tis_in, in, tisn));
break;
case MLX5_CMD_OP_MODIFY_TIS:
- obj_id = MLX5_GET(modify_tis_in, in, tisn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
+ MLX5_GET(modify_tis_in, in, tisn));
break;
case MLX5_CMD_OP_QUERY_FLOW_TABLE:
- obj_id = MLX5_GET(query_flow_table_in, in, table_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
+ MLX5_GET(query_flow_table_in, in,
+ table_id));
break;
case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
- obj_id = MLX5_GET(modify_flow_table_in, in, table_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
+ MLX5_GET(modify_flow_table_in, in,
+ table_id));
break;
case MLX5_CMD_OP_QUERY_FLOW_GROUP:
- obj_id = MLX5_GET(query_flow_group_in, in, group_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_GROUP,
+ MLX5_GET(query_flow_group_in, in,
+ group_id));
break;
case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
- obj_id = MLX5_GET(query_fte_in, in, flow_index);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
+ MLX5_GET(query_fte_in, in,
+ flow_index));
break;
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
- obj_id = MLX5_GET(set_fte_in, in, flow_index);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
+ MLX5_GET(set_fte_in, in, flow_index));
break;
case MLX5_CMD_OP_QUERY_Q_COUNTER:
- obj_id = MLX5_GET(query_q_counter_in, in, counter_set_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_Q_COUNTER,
+ MLX5_GET(query_q_counter_in, in,
+ counter_set_id));
break;
case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
- obj_id = MLX5_GET(query_flow_counter_in, in, flow_counter_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_FLOW_COUNTER,
+ MLX5_GET(query_flow_counter_in, in,
+ flow_counter_id));
break;
case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
- obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT,
+ MLX5_GET(general_obj_in_cmd_hdr, in,
+ obj_id));
break;
case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
- obj_id = MLX5_GET(query_scheduling_element_in, in,
- scheduling_element_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
+ MLX5_GET(query_scheduling_element_in,
+ in, scheduling_element_id));
break;
case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
- obj_id = MLX5_GET(modify_scheduling_element_in, in,
- scheduling_element_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
+ MLX5_GET(modify_scheduling_element_in,
+ in, scheduling_element_id));
break;
case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
- obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT,
+ MLX5_GET(add_vxlan_udp_dport_in, in,
+ vxlan_udp_port));
break;
case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
- obj_id = MLX5_GET(query_l2_table_entry_in, in, table_index);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
+ MLX5_GET(query_l2_table_entry_in, in,
+ table_index));
break;
case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
- obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
+ MLX5_GET(set_l2_table_entry_in, in,
+ table_index));
break;
case MLX5_CMD_OP_QUERY_QP:
- obj_id = MLX5_GET(query_qp_in, in, qpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(query_qp_in, in, qpn));
break;
case MLX5_CMD_OP_RST2INIT_QP:
- obj_id = MLX5_GET(rst2init_qp_in, in, qpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(rst2init_qp_in, in, qpn));
break;
case MLX5_CMD_OP_INIT2RTR_QP:
- obj_id = MLX5_GET(init2rtr_qp_in, in, qpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(init2rtr_qp_in, in, qpn));
break;
case MLX5_CMD_OP_RTR2RTS_QP:
- obj_id = MLX5_GET(rtr2rts_qp_in, in, qpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(rtr2rts_qp_in, in, qpn));
break;
case MLX5_CMD_OP_RTS2RTS_QP:
- obj_id = MLX5_GET(rts2rts_qp_in, in, qpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(rts2rts_qp_in, in, qpn));
break;
case MLX5_CMD_OP_SQERR2RTS_QP:
- obj_id = MLX5_GET(sqerr2rts_qp_in, in, qpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(sqerr2rts_qp_in, in, qpn));
break;
case MLX5_CMD_OP_2ERR_QP:
- obj_id = MLX5_GET(qp_2err_in, in, qpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(qp_2err_in, in, qpn));
break;
case MLX5_CMD_OP_2RST_QP:
- obj_id = MLX5_GET(qp_2rst_in, in, qpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(qp_2rst_in, in, qpn));
break;
case MLX5_CMD_OP_QUERY_DCT:
- obj_id = MLX5_GET(query_dct_in, in, dctn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
+ MLX5_GET(query_dct_in, in, dctn));
break;
case MLX5_CMD_OP_QUERY_XRQ:
- obj_id = MLX5_GET(query_xrq_in, in, xrqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
+ MLX5_GET(query_xrq_in, in, xrqn));
break;
case MLX5_CMD_OP_QUERY_XRC_SRQ:
- obj_id = MLX5_GET(query_xrc_srq_in, in, xrc_srqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
+ MLX5_GET(query_xrc_srq_in, in,
+ xrc_srqn));
break;
case MLX5_CMD_OP_ARM_XRC_SRQ:
- obj_id = MLX5_GET(arm_xrc_srq_in, in, xrc_srqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
+ MLX5_GET(arm_xrc_srq_in, in, xrc_srqn));
break;
case MLX5_CMD_OP_QUERY_SRQ:
- obj_id = MLX5_GET(query_srq_in, in, srqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SRQ,
+ MLX5_GET(query_srq_in, in, srqn));
break;
case MLX5_CMD_OP_ARM_RQ:
- obj_id = MLX5_GET(arm_rq_in, in, srq_number);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ MLX5_GET(arm_rq_in, in, srq_number));
break;
case MLX5_CMD_OP_DRAIN_DCT:
case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
- obj_id = MLX5_GET(drain_dct_in, in, dctn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
+ MLX5_GET(drain_dct_in, in, dctn));
break;
case MLX5_CMD_OP_ARM_XRQ:
- obj_id = MLX5_GET(arm_xrq_in, in, xrqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
+ MLX5_GET(arm_xrq_in, in, xrqn));
break;
default:
return false;
@@ -264,11 +329,102 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
return false;
}
-static bool devx_is_obj_create_cmd(const void *in)
+static void devx_set_umem_valid(const void *in)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
switch (opcode) {
+ case MLX5_CMD_OP_CREATE_MKEY:
+ MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
+ break;
+ case MLX5_CMD_OP_CREATE_CQ:
+ {
+ void *cqc;
+
+ MLX5_SET(create_cq_in, in, cq_umem_valid, 1);
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+ MLX5_SET(cqc, cqc, dbr_umem_valid, 1);
+ break;
+ }
+ case MLX5_CMD_OP_CREATE_QP:
+ {
+ void *qpc;
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, dbr_umem_valid, 1);
+ MLX5_SET(create_qp_in, in, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_CREATE_RQ:
+ {
+ void *rqc, *wq;
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ MLX5_SET(wq, wq, dbr_umem_valid, 1);
+ MLX5_SET(wq, wq, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_CREATE_SQ:
+ {
+ void *sqc, *wq;
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ MLX5_SET(wq, wq, dbr_umem_valid, 1);
+ MLX5_SET(wq, wq, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_MODIFY_CQ:
+ MLX5_SET(modify_cq_in, in, cq_umem_valid, 1);
+ break;
+
+ case MLX5_CMD_OP_CREATE_RMP:
+ {
+ void *rmpc, *wq;
+
+ rmpc = MLX5_ADDR_OF(create_rmp_in, in, ctx);
+ wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+ MLX5_SET(wq, wq, dbr_umem_valid, 1);
+ MLX5_SET(wq, wq, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_CREATE_XRQ:
+ {
+ void *xrqc, *wq;
+
+ xrqc = MLX5_ADDR_OF(create_xrq_in, in, xrq_context);
+ wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
+ MLX5_SET(wq, wq, dbr_umem_valid, 1);
+ MLX5_SET(wq, wq, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ {
+ void *xrc_srqc;
+
+ MLX5_SET(create_xrc_srq_in, in, xrc_srq_umem_valid, 1);
+ xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, in,
+ xrc_srq_context_entry);
+ MLX5_SET(xrc_srqc, xrc_srqc, dbr_umem_valid, 1);
+ break;
+ }
+
+ default:
+ return;
+ }
+}
+
+static bool devx_is_obj_create_cmd(const void *in, u16 *opcode)
+{
+ *opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (*opcode) {
case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
case MLX5_CMD_OP_CREATE_MKEY:
case MLX5_CMD_OP_CREATE_CQ:
@@ -385,12 +541,49 @@ static bool devx_is_obj_query_cmd(const void *in)
}
}
+static bool devx_is_whitelist_cmd(void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_QUERY_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
+{
+ if (devx_is_whitelist_cmd(cmd_in)) {
+ struct mlx5_ib_dev *dev;
+
+ if (c->devx_uid)
+ return c->devx_uid;
+
+ dev = to_mdev(c->ibucontext.device);
+ if (dev->devx_whitelist_uid)
+ return dev->devx_whitelist_uid;
+
+ return -EOPNOTSUPP;
+ }
+
+ if (!c->devx_uid)
+ return -EINVAL;
+
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
+
+ return c->devx_uid;
+}
static bool devx_is_general_cmd(void *in)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
switch (opcode) {
case MLX5_CMD_OP_QUERY_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
case MLX5_CMD_OP_QUERY_VPORT_STATE:
case MLX5_CMD_OP_QUERY_ADAPTER:
case MLX5_CMD_OP_QUERY_ISSI:
@@ -498,14 +691,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
void *cmd_out;
int err;
+ int uid;
c = devx_ufile2uctx(file);
if (IS_ERR(c))
return PTR_ERR(c);
dev = to_mdev(c->ibucontext.device);
- if (!c->devx_uid)
- return -EPERM;
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
/* Only white list of some general HCA commands are allowed for this method. */
if (!devx_is_general_cmd(cmd_in))
@@ -515,7 +710,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
if (IS_ERR(cmd_out))
return PTR_ERR(cmd_out);
- MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
err = mlx5_cmd_exec(dev->mdev, cmd_in,
uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
cmd_out, cmd_out_len);
@@ -723,13 +918,18 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
struct devx_obj *obj;
int err;
+ int uid;
+ u32 obj_id;
+ u16 opcode;
- if (!c->devx_uid)
- return -EPERM;
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
- if (!devx_is_obj_create_cmd(cmd_in))
+ if (!devx_is_obj_create_cmd(cmd_in, &opcode))
return -EINVAL;
cmd_out = uverbs_zalloc(attrs, cmd_out_len);
@@ -740,7 +940,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
if (!obj)
return -ENOMEM;
- MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+ devx_set_umem_valid(cmd_in);
+
err = mlx5_cmd_exec(dev->mdev, cmd_in,
uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN),
cmd_out, cmd_out_len);
@@ -749,15 +951,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
uobj->object = obj;
obj->mdev = dev->mdev;
- devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, &obj->obj_id);
+ devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen,
+ &obj_id);
WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
if (err)
- goto obj_free;
+ goto obj_destroy;
+ obj->obj_id = get_enc_obj_id(opcode, obj_id);
return 0;
+obj_destroy:
+ mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
obj_free:
kfree(obj);
return err;
@@ -775,9 +981,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
struct devx_obj *obj = uobj->object;
void *cmd_out;
int err;
+ int uid;
- if (!c->devx_uid)
- return -EPERM;
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
if (!devx_is_obj_modify_cmd(cmd_in))
return -EINVAL;
@@ -789,7 +997,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
if (IS_ERR(cmd_out))
return PTR_ERR(cmd_out);
- MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+ devx_set_umem_valid(cmd_in);
+
err = mlx5_cmd_exec(obj->mdev, cmd_in,
uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
cmd_out, cmd_out_len);
@@ -812,9 +1022,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
struct devx_obj *obj = uobj->object;
void *cmd_out;
int err;
+ int uid;
- if (!c->devx_uid)
- return -EPERM;
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
if (!devx_is_obj_query_cmd(cmd_in))
return -EINVAL;
@@ -826,7 +1038,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
if (IS_ERR(cmd_out))
return PTR_ERR(cmd_out);
- MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
err = mlx5_cmd_exec(obj->mdev, cmd_in,
uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
cmd_out, cmd_out_len);
@@ -925,6 +1137,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
int err;
if (!c->devx_uid)
+ return -EINVAL;
+
+ if (!capable(CAP_NET_RAW))
return -EPERM;
obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index 1a29f47f836e..e57435cb6d96 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -7,7 +7,9 @@
#include <rdma/ib_verbs.h>
#include <rdma/uverbs_types.h>
#include <rdma/uverbs_ioctl.h>
+#include <rdma/uverbs_std_types.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/ib_umem.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
@@ -16,6 +18,24 @@
#define UVERBS_MODULE_NAME mlx5_ib
#include <rdma/uverbs_named_ioctl.h>
+static int
+mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
+ enum mlx5_flow_namespace_type *namespace)
+{
+ switch (table_type) {
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_BYPASS;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_EGRESS;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
[MLX5_IB_FLOW_TYPE_NORMAL] = {
.type = UVERBS_ATTR_TYPE_PTR_IN,
@@ -38,11 +58,15 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
},
};
+#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
{
+ struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
struct mlx5_ib_flow_handler *flow_handler;
struct mlx5_ib_flow_matcher *fs_matcher;
+ struct ib_uobject **arr_flow_actions;
+ struct ib_uflow_resources *uflow_res;
void *devx_obj;
int dest_id, dest_type;
void *cmd_in;
@@ -52,6 +76,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
struct ib_uobject *uobj =
uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
+ int len, ret, i;
if (!capable(CAP_NET_RAW))
return -EPERM;
@@ -61,7 +86,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
dest_qp = uverbs_attr_is_valid(attrs,
MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
- if ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))
+ fs_matcher = uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS &&
+ ((dest_devx && dest_qp) || (!dest_devx && !dest_qp)))
+ return -EINVAL;
+
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS &&
+ (dest_devx || dest_qp))
return -EINVAL;
if (dest_devx) {
@@ -75,7 +107,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
*/
if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type))
return -EINVAL;
- } else {
+ } else if (dest_qp) {
struct mlx5_ib_qp *mqp;
qp = uverbs_attr_get_obj(attrs,
@@ -92,6 +124,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
else
dest_id = mqp->raw_packet_qp.rq.tirn;
dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ } else {
+ dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
}
if (dev->rep)
@@ -101,16 +135,48 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
inlen = uverbs_attr_get_len(attrs,
MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
- fs_matcher = uverbs_attr_get_obj(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
- flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, cmd_in, inlen,
+
+ uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
+ if (!uflow_res)
+ return -ENOMEM;
+
+ len = uverbs_attr_get_uobjs_arr(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
+ for (i = 0; i < len; i++) {
+ struct mlx5_ib_flow_action *maction =
+ to_mflow_act(arr_flow_actions[i]->object);
+
+ ret = parse_flow_flow_action(maction, false, &flow_act);
+ if (ret)
+ goto err_out;
+ flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
+ arr_flow_actions[i]->object);
+ }
+
+ ret = uverbs_copy_from(&flow_act.flow_tag, attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_TAG);
+ if (!ret) {
+ if (flow_act.flow_tag >= BIT(24)) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+ flow_act.has_flow_tag = true;
+ }
+
+ flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act,
+ cmd_in, inlen,
dest_id, dest_type);
- if (IS_ERR(flow_handler))
- return PTR_ERR(flow_handler);
+ if (IS_ERR(flow_handler)) {
+ ret = PTR_ERR(flow_handler);
+ goto err_out;
+ }
- ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev);
+ ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
return 0;
+err_out:
+ ib_uverbs_flow_resources_free(uflow_res);
+ return ret;
}
static int flow_matcher_cleanup(struct ib_uobject *uobject,
@@ -134,12 +200,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
struct mlx5_ib_flow_matcher *obj;
+ u32 flags;
int err;
obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
if (!obj)
return -ENOMEM;
+ obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
obj->mask_len = uverbs_attr_get_len(
attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
err = uverbs_copy_from(&obj->matcher_mask,
@@ -165,6 +233,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
if (err)
goto end;
+ err = uverbs_get_flags32(&flags, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ IB_FLOW_ATTR_FLAGS_EGRESS);
+ if (err)
+ goto end;
+
+ if (flags) {
+ err = mlx5_ib_ft_type_to_namespace(
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, &obj->ns_type);
+ if (err)
+ goto end;
+ }
+
uobj->object = obj;
obj->mdev = dev->mdev;
atomic_set(&obj->usecnt, 0);
@@ -175,6 +256,248 @@ end:
return err;
}
+void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
+{
+ switch (maction->flow_action_raw.sub_type) {
+ case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
+ mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
+ maction->flow_action_raw.action_id);
+ break;
+ case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
+ mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
+ maction->flow_action_raw.action_id);
+ break;
+ case MLX5_IB_FLOW_ACTION_DECAP:
+ break;
+ default:
+ break;
+ }
+}
+
+static struct ib_flow_action *
+mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_uapi_flow_table_type ft_type,
+ u8 num_actions, void *in)
+{
+ enum mlx5_flow_namespace_type namespace;
+ struct mlx5_ib_flow_action *maction;
+ int ret;
+
+ ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
+ if (ret)
+ return ERR_PTR(-EINVAL);
+
+ maction = kzalloc(sizeof(*maction), GFP_KERNEL);
+ if (!maction)
+ return ERR_PTR(-ENOMEM);
+
+ ret = mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in,
+ &maction->flow_action_raw.action_id);
+
+ if (ret) {
+ kfree(maction);
+ return ERR_PTR(ret);
+ }
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
+ maction->flow_action_raw.dev = dev;
+
+ return &maction->ib_action;
+}
+
+static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
+{
+ return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ max_modify_header_actions) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, max_modify_header_actions);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
+ struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
+ enum mlx5_ib_uapi_flow_table_type ft_type;
+ struct ib_flow_action *action;
+ size_t num_actions;
+ void *in;
+ int len;
+ int ret;
+
+ if (!mlx5_ib_modify_header_supported(mdev))
+ return -EOPNOTSUPP;
+
+ in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
+ len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
+
+ if (len % MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto))
+ return -EINVAL;
+
+ ret = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
+ if (ret)
+ return ret;
+
+ num_actions = len / MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto),
+ action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
+ if (IS_ERR(action))
+ return PTR_ERR(action);
+
+ uverbs_flow_action_fill_action(action, uobj, uobj->context->device,
+ IB_FLOW_ACTION_UNSPECIFIED);
+
+ return 0;
+}
+
+static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
+ u8 packet_reformat_type,
+ u8 ft_type)
+{
+ switch (packet_reformat_type) {
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
+ return MLX5_CAP_FLOWTABLE(ibdev->mdev,
+ encap_general_header);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
+ return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
+ reformat_l2_to_l3_tunnel);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
+ return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
+ reformat_l3_tunnel_to_l2);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
+ return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
+{
+ switch (dv_prt) {
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx5_ib_flow_action_create_packet_reformat_ctx(
+ struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_action *maction,
+ u8 ft_type, u8 dv_prt,
+ void *in, size_t len)
+{
+ enum mlx5_flow_namespace_type namespace;
+ u8 prm_prt;
+ int ret;
+
+ ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
+ if (ret)
+ return ret;
+
+ ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
+ if (ret)
+ return ret;
+
+ ret = mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
+ in, namespace,
+ &maction->flow_action_raw.action_id);
+ if (ret)
+ return ret;
+
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
+ maction->flow_action_raw.dev = dev;
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
+ struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
+ enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
+ enum mlx5_ib_uapi_flow_table_type ft_type;
+ struct mlx5_ib_flow_action *maction;
+ int ret;
+
+ ret = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&dv_prt, attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
+ if (ret)
+ return ret;
+
+ if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
+ return -EOPNOTSUPP;
+
+ maction = kzalloc(sizeof(*maction), GFP_KERNEL);
+ if (!maction)
+ return -ENOMEM;
+
+ if (dv_prt ==
+ MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_DECAP;
+ maction->flow_action_raw.dev = mdev;
+ } else {
+ void *in;
+ int len;
+
+ in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
+ if (IS_ERR(in)) {
+ ret = PTR_ERR(in);
+ goto free_maction;
+ }
+
+ len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
+
+ ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
+ maction, ft_type, dv_prt, in, len);
+ if (ret)
+ goto free_maction;
+ }
+
+ uverbs_flow_action_fill_action(&maction->ib_action, uobj,
+ uobj->context->device,
+ IB_FLOW_ACTION_UNSPECIFIED);
+ return 0;
+
+free_maction:
+ kfree(maction);
+ return ret;
+}
+
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_CREATE_FLOW,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
@@ -195,7 +518,15 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ACCESS_READ),
UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
MLX5_IB_OBJECT_DEVX_OBJ,
- UVERBS_ACCESS_READ));
+ UVERBS_ACCESS_READ),
+ UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_READ, 1,
+ MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
MLX5_IB_METHOD_DESTROY_FLOW,
@@ -210,6 +541,44 @@ ADD_UVERBS_METHODS(mlx5_ib_fs,
&UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
+ set_action_in_add_action_in_auto)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
+ UVERBS_ATTR_MIN_SIZE(1),
+ UA_ALLOC_AND_COPY,
+ UA_OPTIONAL),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
+ enum mlx5_ib_uapi_flow_action_packet_reformat_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(
+ mlx5_ib_flow_actions,
+ UVERBS_OBJECT_FLOW_ACTION,
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
+
+DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
MLX5_IB_OBJECT_FLOW_MATCHER,
@@ -224,7 +593,10 @@ DECLARE_UVERBS_NAMED_METHOD(
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
UVERBS_ATTR_TYPE(u8),
- UA_MANDATORY));
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ enum ib_flow_flags,
+ UA_OPTIONAL));
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
@@ -247,6 +619,7 @@ int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
root[i++] = &flow_objects;
root[i++] = &mlx5_ib_fs;
+ root[i++] = &mlx5_ib_flow_actions;
return i;
}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 35a0e04c38f2..584ff2ea7810 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -39,9 +39,6 @@ static const struct mlx5_ib_profile rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
- STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
- mlx5_ib_stage_class_attr_init,
- NULL),
};
static int
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index c414f3809e5c..be701d40289e 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1571,14 +1571,57 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
}
-static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
+int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
+{
+ int err = 0;
+
+ mutex_lock(&dev->lb.mutex);
+ if (td)
+ dev->lb.user_td++;
+ if (qp)
+ dev->lb.qps++;
+
+ if (dev->lb.user_td == 2 ||
+ dev->lb.qps == 1) {
+ if (!dev->lb.enabled) {
+ err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
+ dev->lb.enabled = true;
+ }
+ }
+
+ mutex_unlock(&dev->lb.mutex);
+
+ return err;
+}
+
+void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
+{
+ mutex_lock(&dev->lb.mutex);
+ if (td)
+ dev->lb.user_td--;
+ if (qp)
+ dev->lb.qps--;
+
+ if (dev->lb.user_td == 1 &&
+ dev->lb.qps == 0) {
+ if (dev->lb.enabled) {
+ mlx5_nic_vport_update_local_lb(dev->mdev, false);
+ dev->lb.enabled = false;
+ }
+ }
+
+ mutex_unlock(&dev->lb.mutex);
+}
+
+static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn,
+ u16 uid)
{
int err;
if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
return 0;
- err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
+ err = mlx5_cmd_alloc_transport_domain(dev->mdev, tdn, uid);
if (err)
return err;
@@ -1587,35 +1630,23 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
!MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
return err;
- mutex_lock(&dev->lb_mutex);
- dev->user_td++;
-
- if (dev->user_td == 2)
- err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
-
- mutex_unlock(&dev->lb_mutex);
- return err;
+ return mlx5_ib_enable_lb(dev, true, false);
}
-static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
+static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
+ u16 uid)
{
if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
return;
- mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
+ mlx5_cmd_dealloc_transport_domain(dev->mdev, tdn, uid);
if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
(!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
!MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
return;
- mutex_lock(&dev->lb_mutex);
- dev->user_td--;
-
- if (dev->user_td < 2)
- mlx5_nic_vport_update_local_lb(dev->mdev, false);
-
- mutex_unlock(&dev->lb_mutex);
+ mlx5_ib_disable_lb(dev, true, false);
}
static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
@@ -1727,30 +1758,24 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
- err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
- if (err)
- goto out_uars;
-
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
- /* Block DEVX on Infiniband as of SELinux */
- if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) {
- err = -EPERM;
- goto out_td;
- }
-
- err = mlx5_ib_devx_create(dev, context);
- if (err)
- goto out_td;
+ err = mlx5_ib_devx_create(dev);
+ if (err < 0)
+ goto out_uars;
+ context->devx_uid = err;
}
+ err = mlx5_ib_alloc_transport_domain(dev, &context->tdn,
+ context->devx_uid);
+ if (err)
+ goto out_devx;
+
if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
if (err)
goto out_mdev;
}
- INIT_LIST_HEAD(&context->vma_private_list);
- mutex_init(&context->vma_private_list_mutex);
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
@@ -1826,13 +1851,21 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->lib_caps = req.lib_caps;
print_lib_caps(dev, context->lib_caps);
+ if (mlx5_lag_is_active(dev->mdev)) {
+ u8 port = mlx5_core_native_port_num(dev->mdev);
+
+ atomic_set(&context->tx_port_affinity,
+ atomic_add_return(
+ 1, &dev->roce[port].tx_port_affinity));
+ }
+
return &context->ibucontext;
out_mdev:
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
+out_devx:
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
- mlx5_ib_devx_destroy(dev, context);
-out_td:
- mlx5_ib_dealloc_transport_domain(dev, context->tdn);
+ mlx5_ib_devx_destroy(dev, context->devx_uid);
out_uars:
deallocate_uars(dev, context);
@@ -1855,11 +1888,18 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_bfreg_info *bfregi;
- if (context->devx_uid)
- mlx5_ib_devx_destroy(dev, context);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ /* All umem's must be destroyed before destroying the ucontext. */
+ mutex_lock(&ibcontext->per_mm_list_lock);
+ WARN_ON(!list_empty(&ibcontext->per_mm_list));
+ mutex_unlock(&ibcontext->per_mm_list_lock);
+#endif
bfregi = &context->bfregi;
- mlx5_ib_dealloc_transport_domain(dev, context->tdn);
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
+
+ if (context->devx_uid)
+ mlx5_ib_devx_destroy(dev, context->devx_uid);
deallocate_uars(dev, context);
kfree(bfregi->sys_pages);
@@ -1900,94 +1940,9 @@ static int get_extended_index(unsigned long offset)
return get_arg(offset) | ((offset >> 16) & 0xff) << 8;
}
-static void mlx5_ib_vma_open(struct vm_area_struct *area)
-{
- /* vma_open is called when a new VMA is created on top of our VMA. This
- * is done through either mremap flow or split_vma (usually due to
- * mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
- * as this VMA is strongly hardware related. Therefore we set the
- * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
- * calling us again and trying to do incorrect actions. We assume that
- * the original VMA size is exactly a single page, and therefore all
- * "splitting" operation will not happen to it.
- */
- area->vm_ops = NULL;
-}
-
-static void mlx5_ib_vma_close(struct vm_area_struct *area)
-{
- struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
-
- /* It's guaranteed that all VMAs opened on a FD are closed before the
- * file itself is closed, therefore no sync is needed with the regular
- * closing flow. (e.g. mlx5 ib_dealloc_ucontext)
- * However need a sync with accessing the vma as part of
- * mlx5_ib_disassociate_ucontext.
- * The close operation is usually called under mm->mmap_sem except when
- * process is exiting.
- * The exiting case is handled explicitly as part of
- * mlx5_ib_disassociate_ucontext.
- */
- mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
-
- /* setting the vma context pointer to null in the mlx5_ib driver's
- * private data, to protect a race condition in
- * mlx5_ib_disassociate_ucontext().
- */
- mlx5_ib_vma_priv_data->vma = NULL;
- mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
- list_del(&mlx5_ib_vma_priv_data->list);
- mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
- kfree(mlx5_ib_vma_priv_data);
-}
-
-static const struct vm_operations_struct mlx5_ib_vm_ops = {
- .open = mlx5_ib_vma_open,
- .close = mlx5_ib_vma_close
-};
-
-static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
- struct mlx5_ib_ucontext *ctx)
-{
- struct mlx5_ib_vma_private_data *vma_prv;
- struct list_head *vma_head = &ctx->vma_private_list;
-
- vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
- if (!vma_prv)
- return -ENOMEM;
-
- vma_prv->vma = vma;
- vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex;
- vma->vm_private_data = vma_prv;
- vma->vm_ops = &mlx5_ib_vm_ops;
-
- mutex_lock(&ctx->vma_private_list_mutex);
- list_add(&vma_prv->list, vma_head);
- mutex_unlock(&ctx->vma_private_list_mutex);
-
- return 0;
-}
static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
- struct vm_area_struct *vma;
- struct mlx5_ib_vma_private_data *vma_private, *n;
- struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
-
- mutex_lock(&context->vma_private_list_mutex);
- list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
- list) {
- vma = vma_private->vma;
- zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
- /* context going to be destroyed, should
- * not access ops any more.
- */
- vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
- vma->vm_ops = NULL;
- list_del(&vma_private->list);
- kfree(vma_private);
- }
- mutex_unlock(&context->vma_private_list_mutex);
}
static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
@@ -2010,9 +1965,6 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
struct vm_area_struct *vma,
struct mlx5_ib_ucontext *context)
{
- phys_addr_t pfn;
- int err;
-
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
@@ -2025,13 +1977,8 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
if (!dev->mdev->clock_info_page)
return -EOPNOTSUPP;
- pfn = page_to_pfn(dev->mdev->clock_info_page);
- err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE,
- vma->vm_page_prot);
- if (err)
- return err;
-
- return mlx5_ib_set_vma_data(vma, context);
+ return rdma_user_mmap_page(&context->ibucontext, vma,
+ dev->mdev->clock_info_page, PAGE_SIZE);
}
static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
@@ -2121,21 +2068,15 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
pfn = uar_index2pfn(dev, uar_index);
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
- vma->vm_page_prot = prot;
- err = io_remap_pfn_range(vma, vma->vm_start, pfn,
- PAGE_SIZE, vma->vm_page_prot);
+ err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE,
+ prot);
if (err) {
mlx5_ib_err(dev,
- "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n",
+ "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n",
err, mmap_cmd2str(cmd));
- err = -EAGAIN;
goto err;
}
- err = mlx5_ib_set_vma_data(vma, context);
- if (err)
- goto err;
-
if (dyn_uar)
bfregi->sys_pages[idx] = uar_index;
return 0;
@@ -2160,7 +2101,6 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
size_t map_size = vma->vm_end - vma->vm_start;
u32 npages = map_size >> PAGE_SHIFT;
phys_addr_t pfn;
- pgprot_t prot;
if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
page_idx + npages)
@@ -2170,14 +2110,8 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
PAGE_SHIFT) +
page_idx;
- prot = pgprot_writecombine(vma->vm_page_prot);
- vma->vm_page_prot = prot;
-
- if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size,
- vma->vm_page_prot))
- return -EAGAIN;
-
- return mlx5_ib_set_vma_data(vma, mctx);
+ return rdma_user_mmap_io(context, vma, pfn, map_size,
+ pgprot_writecombine(vma->vm_page_prot));
}
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
@@ -2318,21 +2252,30 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
struct mlx5_ib_alloc_pd_resp resp;
struct mlx5_ib_pd *pd;
int err;
+ u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {};
+ u16 uid = 0;
pd = kmalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
- err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
+ uid = context ? to_mucontext(context)->devx_uid : 0;
+ MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
+ MLX5_SET(alloc_pd_in, in, uid, uid);
+ err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in),
+ out, sizeof(out));
if (err) {
kfree(pd);
return ERR_PTR(err);
}
+ pd->pdn = MLX5_GET(alloc_pd_out, out, pd);
+ pd->uid = uid;
if (context) {
resp.pdn = pd->pdn;
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
- mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
+ mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid);
kfree(pd);
return ERR_PTR(-EFAULT);
}
@@ -2346,7 +2289,7 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
struct mlx5_ib_dev *mdev = to_mdev(pd->device);
struct mlx5_ib_pd *mpd = to_mpd(pd);
- mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
+ mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
kfree(mpd);
return 0;
@@ -2452,20 +2395,50 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
offsetof(typeof(filter), field) -\
sizeof(filter.field))
-static int parse_flow_flow_action(const union ib_flow_spec *ib_spec,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_act *action)
+int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
+ bool is_egress,
+ struct mlx5_flow_act *action)
{
- struct mlx5_ib_flow_action *maction = to_mflow_act(ib_spec->action.act);
switch (maction->ib_action.type) {
case IB_FLOW_ACTION_ESP:
+ if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
+ return -EINVAL;
/* Currently only AES_GCM keymat is supported by the driver */
action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
- action->action |= flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS ?
+ action->action |= is_egress ?
MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
return 0;
+ case IB_FLOW_ACTION_UNSPECIFIED:
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ return -EINVAL;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ action->modify_id = maction->flow_action_raw.action_id;
+ return 0;
+ }
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_DECAP) {
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ return -EINVAL;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ return 0;
+ }
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
+ if (action->action &
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
+ return -EINVAL;
+ action->action |=
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ action->reformat_id =
+ maction->flow_action_raw.action_id;
+ return 0;
+ }
+ /* fall through */
default:
return -EOPNOTSUPP;
}
@@ -2802,7 +2775,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
break;
case IB_FLOW_SPEC_ACTION_HANDLE:
- ret = parse_flow_flow_action(ib_spec, flow_attr, action);
+ ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
+ flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
if (ret)
return ret;
break;
@@ -2883,7 +2857,7 @@ is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
* rules would be supported, always return VALID_SPEC_NA.
*/
if (!is_crypto)
- return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA;
+ return VALID_SPEC_NA;
return is_crypto && is_ipsec &&
(!egress || (!is_drop && !flow_act->has_flow_tag)) ?
@@ -3026,14 +3000,15 @@ enum flow_table_type {
static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
struct mlx5_ib_flow_prio *prio,
int priority,
- int num_entries, int num_groups)
+ int num_entries, int num_groups,
+ u32 flags)
{
struct mlx5_flow_table *ft;
ft = mlx5_create_auto_grouped_flow_table(ns, priority,
num_entries,
num_groups,
- 0, 0);
+ 0, flags);
if (IS_ERR(ft))
return ERR_CAST(ft);
@@ -3053,26 +3028,43 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
int max_table_size;
int num_entries;
int num_groups;
+ u32 flags = 0;
int priority;
max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
log_max_ft_size));
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- if (ft_type == MLX5_IB_FT_TX)
- priority = 0;
- else if (flow_is_multicast_only(flow_attr) &&
- !dont_trap)
+ enum mlx5_flow_namespace_type fn_type;
+
+ if (flow_is_multicast_only(flow_attr) &&
+ !dont_trap)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
priority = ib_prio_to_core_prio(flow_attr->priority,
dont_trap);
- ns = mlx5_get_flow_namespace(dev->mdev,
- ft_type == MLX5_IB_FT_TX ?
- MLX5_FLOW_NAMESPACE_EGRESS :
- MLX5_FLOW_NAMESPACE_BYPASS);
+ if (ft_type == MLX5_IB_FT_RX) {
+ fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
+ prio = &dev->flow_db->prios[priority];
+ if (!dev->rep &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (!dev->rep &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ reformat_l3_tunnel_to_l2))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else {
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
+ log_max_ft_size));
+ fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
+ prio = &dev->flow_db->egress_prios[priority];
+ if (!dev->rep &&
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ }
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
num_entries = MLX5_FS_MAX_ENTRIES;
num_groups = MLX5_FS_MAX_TYPES;
- prio = &dev->flow_db->prios[priority];
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
ns = mlx5_get_flow_namespace(dev->mdev,
@@ -3104,7 +3096,8 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
ft = prio->flow_table;
if (!ft)
- return _get_prio(ns, prio, priority, num_entries, num_groups);
+ return _get_prio(ns, prio, priority, num_entries, num_groups,
+ flags);
return prio;
}
@@ -3271,6 +3264,9 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
if (!is_valid_attr(dev->mdev, flow_attr))
return ERR_PTR(-EINVAL);
+ if (dev->rep && is_egress)
+ return ERR_PTR(-EINVAL);
+
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
handler = kzalloc(sizeof(*handler), GFP_KERNEL);
if (!handler || !spec) {
@@ -3658,34 +3654,54 @@ free_ucmd:
return ERR_PTR(err);
}
-static struct mlx5_ib_flow_prio *_get_flow_table(struct mlx5_ib_dev *dev,
- int priority, bool mcast)
+static struct mlx5_ib_flow_prio *
+_get_flow_table(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_matcher *fs_matcher,
+ bool mcast)
{
- int max_table_size;
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio;
+ int max_table_size;
+ u32 flags = 0;
+ int priority;
+
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ log_max_ft_size));
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ reformat_l3_tunnel_to_l2))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else { /* Can only be MLX5_FLOW_NAMESPACE_EGRESS */
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
+ log_max_ft_size));
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ }
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- log_max_ft_size));
if (max_table_size < MLX5_FS_MAX_ENTRIES)
return ERR_PTR(-ENOMEM);
if (mcast)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
- priority = ib_prio_to_core_prio(priority, false);
+ priority = ib_prio_to_core_prio(fs_matcher->priority, false);
- ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS);
+ ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
if (!ns)
return ERR_PTR(-ENOTSUPP);
- prio = &dev->flow_db->prios[priority];
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
+ prio = &dev->flow_db->prios[priority];
+ else
+ prio = &dev->flow_db->egress_prios[priority];
if (prio->flow_table)
return prio;
return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES,
- MLX5_FS_MAX_TYPES);
+ MLX5_FS_MAX_TYPES, flags);
}
static struct mlx5_ib_flow_handler *
@@ -3693,10 +3709,10 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
struct mlx5_flow_destination *dst,
struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_act *flow_act,
void *cmd_in, int inlen)
{
struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
struct mlx5_flow_spec *spec;
struct mlx5_flow_table *ft = ft_prio->flow_table;
int err = 0;
@@ -3715,9 +3731,8 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
fs_matcher->mask_len);
spec->match_criteria_enable = fs_matcher->match_criteria_enable;
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
handler->rule = mlx5_add_flow_rules(ft, spec,
- &flow_act, dst, 1);
+ flow_act, dst, 1);
if (IS_ERR(handler->rule)) {
err = PTR_ERR(handler->rule);
@@ -3779,12 +3794,12 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
struct mlx5_ib_flow_handler *
mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_act *flow_act,
void *cmd_in, int inlen, int dest_id,
int dest_type)
{
struct mlx5_flow_destination *dst;
struct mlx5_ib_flow_prio *ft_prio;
- int priority = fs_matcher->priority;
struct mlx5_ib_flow_handler *handler;
bool mcast;
int err;
@@ -3802,7 +3817,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
mutex_lock(&dev->flow_db->lock);
- ft_prio = _get_flow_table(dev, priority, mcast);
+ ft_prio = _get_flow_table(dev, fs_matcher, mcast);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto unlock;
@@ -3811,13 +3826,18 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
dst->type = dest_type;
dst->tir_num = dest_id;
- } else {
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
dst->ft_num = dest_id;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ } else {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
}
- handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in,
- inlen);
+ handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act,
+ cmd_in, inlen);
if (IS_ERR(handler)) {
err = PTR_ERR(handler);
@@ -3995,6 +4015,9 @@ static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
*/
mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
break;
+ case IB_FLOW_ACTION_UNSPECIFIED:
+ mlx5_ib_destroy_flow_action_raw(maction);
+ break;
default:
WARN_ON(true);
break;
@@ -4009,13 +4032,17 @@ static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *mqp = to_mqp(ibqp);
int err;
+ u16 uid;
+
+ uid = ibqp->pd ?
+ to_mpd(ibqp->pd)->uid : 0;
if (mqp->flags & MLX5_IB_QP_UNDERLAY) {
mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n");
return -EOPNOTSUPP;
}
- err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
+ err = mlx5_cmd_attach_mcg(dev->mdev, gid, ibqp->qp_num, uid);
if (err)
mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
ibqp->qp_num, gid->raw);
@@ -4027,8 +4054,11 @@ static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
int err;
+ u16 uid;
- err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
+ uid = ibqp->pd ?
+ to_mpd(ibqp->pd)->uid : 0;
+ err = mlx5_cmd_detach_mcg(dev->mdev, gid, ibqp->qp_num, uid);
if (err)
mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
ibqp->qp_num, gid->raw);
@@ -4049,16 +4079,17 @@ static int init_node_data(struct mlx5_ib_dev *dev)
return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
}
-static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t fw_pages_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
}
+static DEVICE_ATTR_RO(fw_pages);
-static ssize_t show_reg_pages(struct device *device,
+static ssize_t reg_pages_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
@@ -4066,44 +4097,47 @@ static ssize_t show_reg_pages(struct device *device,
return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
}
+static DEVICE_ATTR_RO(reg_pages);
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%x\n", dev->mdev->rev_id);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
dev->mdev->board_id);
}
+static DEVICE_ATTR_RO(board_id);
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
-static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
+static struct attribute *mlx5_class_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ &dev_attr_fw_pages.attr,
+ &dev_attr_reg_pages.attr,
+ NULL,
+};
-static struct device_attribute *mlx5_class_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id,
- &dev_attr_fw_pages,
- &dev_attr_reg_pages,
+static const struct attribute_group mlx5_attr_group = {
+ .attrs = mlx5_class_attributes,
};
static void pkey_change_handler(struct work_struct *work)
@@ -5636,7 +5670,6 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
- const char *name;
int err;
int i;
@@ -5669,12 +5702,6 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
- if (!mlx5_lag_is_active(mdev))
- name = "mlx5_%d";
- else
- name = "mlx5_bond_%d";
-
- strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX);
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
@@ -5880,7 +5907,7 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
(MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) ||
MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
- mutex_init(&dev->lb_mutex);
+ mutex_init(&dev->lb.mutex);
return 0;
}
@@ -6087,7 +6114,14 @@ static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev)
int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
- return ib_register_device(&dev->ib_dev, NULL);
+ const char *name;
+
+ rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group);
+ if (!mlx5_lag_is_active(dev->mdev))
+ name = "mlx5_%d";
+ else
+ name = "mlx5_bond_%d";
+ return ib_register_device(&dev->ib_dev, name, NULL);
}
void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
@@ -6117,21 +6151,6 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
cancel_delay_drop(dev);
}
-int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
-{
- int err;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
- err = device_create_file(&dev->ib_dev.dev,
- mlx5_class_attributes[i]);
- if (err)
- return err;
- }
-
- return 0;
-}
-
static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
{
mlx5_ib_register_vport_reps(dev);
@@ -6155,6 +6174,8 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
profile->stage[stage].cleanup(dev);
}
+ if (dev->devx_whitelist_uid)
+ mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
ib_dealloc_device((struct ib_device *)dev);
}
@@ -6163,8 +6184,7 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
{
int err;
int i;
-
- printk_once(KERN_INFO "%s", mlx5_version);
+ int uid;
for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
if (profile->stage[i].init) {
@@ -6174,6 +6194,10 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
}
}
+ uid = mlx5_ib_devx_create(dev);
+ if (uid > 0)
+ dev->devx_whitelist_uid = uid;
+
dev->profile = profile;
dev->ib_active = true;
@@ -6234,9 +6258,6 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
mlx5_ib_stage_delay_drop_init,
mlx5_ib_stage_delay_drop_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
- mlx5_ib_stage_class_attr_init,
- NULL),
};
static const struct mlx5_ib_profile nic_rep_profile = {
@@ -6279,9 +6300,6 @@ static const struct mlx5_ib_profile nic_rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
- STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
- mlx5_ib_stage_class_attr_init,
- NULL),
STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
mlx5_ib_stage_rep_reg_init,
mlx5_ib_stage_rep_reg_cleanup),
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index f3dbd75a0a96..549234988bb4 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -57,7 +57,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
int entry;
unsigned long page_shift = umem->page_shift;
- if (umem->odp_data) {
+ if (umem->is_odp) {
*ncont = ib_umem_page_count(umem);
*count = *ncont << (page_shift - PAGE_SHIFT);
*shift = page_shift;
@@ -152,14 +152,13 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
struct scatterlist *sg;
int entry;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- const bool odp = umem->odp_data != NULL;
-
- if (odp) {
+ if (umem->is_odp) {
WARN_ON(shift != 0);
WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
for (i = 0; i < num_pages; ++i) {
- dma_addr_t pa = umem->odp_data->dma_list[offset + i];
+ dma_addr_t pa =
+ to_ib_umem_odp(umem)->dma_list[offset + i];
pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 320d4dfe8c2f..8444ea78229a 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -39,8 +39,10 @@
#include <rdma/ib_smi.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cq.h>
+#include <linux/mlx5/fs.h>
#include <linux/mlx5/qp.h>
#include <linux/mlx5/srq.h>
+#include <linux/mlx5/fs.h>
#include <linux/types.h>
#include <linux/mlx5/transobj.h>
#include <rdma/ib_user_verbs.h>
@@ -48,17 +50,17 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
-#define mlx5_ib_dbg(dev, format, arg...) \
-pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
- __LINE__, current->pid, ##arg)
+#define mlx5_ib_dbg(_dev, format, arg...) \
+ dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
+ __LINE__, current->pid, ##arg)
-#define mlx5_ib_err(dev, format, arg...) \
-pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
- __LINE__, current->pid, ##arg)
+#define mlx5_ib_err(_dev, format, arg...) \
+ dev_err(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
+ __LINE__, current->pid, ##arg)
-#define mlx5_ib_warn(dev, format, arg...) \
-pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
- __LINE__, current->pid, ##arg)
+#define mlx5_ib_warn(_dev, format, arg...) \
+ dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
+ __LINE__, current->pid, ##arg)
#define field_avail(type, fld, sz) (offsetof(type, fld) + \
sizeof(((type *)0)->fld) <= (sz))
@@ -114,13 +116,6 @@ enum {
MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN,
};
-struct mlx5_ib_vma_private_data {
- struct list_head list;
- struct vm_area_struct *vma;
- /* protect vma_private_list add/del */
- struct mutex *vma_private_list_mutex;
-};
-
struct mlx5_ib_ucontext {
struct ib_ucontext ibucontext;
struct list_head db_page_list;
@@ -132,13 +127,12 @@ struct mlx5_ib_ucontext {
u8 cqe_version;
/* Transport Domain number */
u32 tdn;
- struct list_head vma_private_list;
- /* protect vma_private_list add/del */
- struct mutex vma_private_list_mutex;
u64 lib_caps;
DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
u16 devx_uid;
+ /* For RoCE LAG TX affinity */
+ atomic_t tx_port_affinity;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -149,6 +143,13 @@ static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibuconte
struct mlx5_ib_pd {
struct ib_pd ibpd;
u32 pdn;
+ u16 uid;
+};
+
+enum {
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER,
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT,
+ MLX5_IB_FLOW_ACTION_DECAP,
};
#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1)
@@ -180,6 +181,7 @@ struct mlx5_ib_flow_matcher {
struct mlx5_ib_match_params matcher_mask;
int mask_len;
enum mlx5_ib_flow_type flow_type;
+ enum mlx5_flow_namespace_type ns_type;
u16 priority;
struct mlx5_core_dev *mdev;
atomic_t usecnt;
@@ -188,6 +190,7 @@ struct mlx5_ib_flow_matcher {
struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT];
+ struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS];
struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS];
struct mlx5_flow_table *lag_demux_ft;
@@ -322,6 +325,7 @@ enum {
struct mlx5_ib_rwq_ind_table {
struct ib_rwq_ind_table ib_rwq_ind_tbl;
u32 rqtn;
+ u16 uid;
};
struct mlx5_ib_ubuffer {
@@ -428,7 +432,7 @@ struct mlx5_ib_qp {
struct list_head cq_send_list;
struct mlx5_rate_limit rl;
u32 underlay_qpn;
- bool tunnel_offload_en;
+ u32 flags_en;
/* storage for qp sub type when core qp type is IB_QPT_DRIVER */
enum ib_qp_type qp_sub_type;
};
@@ -535,6 +539,7 @@ struct mlx5_ib_srq {
struct mlx5_ib_xrcd {
struct ib_xrcd ibxrcd;
u32 xrcdn;
+ u16 uid;
};
enum mlx5_ib_mtt_access_flags {
@@ -699,7 +704,7 @@ struct mlx5_roce {
rwlock_t netdev_lock;
struct net_device *netdev;
struct notifier_block nb;
- atomic_t next_port;
+ atomic_t tx_port_affinity;
enum ib_port_state last_port_state;
struct mlx5_ib_dev *dev;
u8 native_port_num;
@@ -814,6 +819,11 @@ struct mlx5_ib_flow_action {
u64 ib_flags;
struct mlx5_accel_esp_xfrm *ctx;
} esp_aes_gcm;
+ struct {
+ struct mlx5_ib_dev *dev;
+ u32 sub_type;
+ u32 action_id;
+ } flow_action_raw;
};
};
@@ -858,9 +868,20 @@ to_mcounters(struct ib_counters *ibcntrs)
return container_of(ibcntrs, struct mlx5_ib_mcounters, ibcntrs);
}
+int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
+ bool is_egress,
+ struct mlx5_flow_act *action);
+struct mlx5_ib_lb_state {
+ /* protect the user_td */
+ struct mutex mutex;
+ u32 user_td;
+ int qps;
+ bool enabled;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
- const struct uverbs_object_tree_def *driver_trees[6];
+ const struct uverbs_object_tree_def *driver_trees[7];
struct mlx5_core_dev *mdev;
struct mlx5_roce roce[MLX5_MAX_PORTS];
int num_ports;
@@ -899,13 +920,12 @@ struct mlx5_ib_dev {
const struct mlx5_ib_profile *profile;
struct mlx5_eswitch_rep *rep;
- /* protect the user_td */
- struct mutex lb_mutex;
- u32 user_td;
+ struct mlx5_ib_lb_state lb;
u8 umr_fence;
struct list_head ib_dev_list;
u64 sys_image_guid;
struct mlx5_memic memic;
+ u16 devx_whitelist_uid;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1016,6 +1036,8 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
int mlx5_ib_destroy_srq(struct ib_srq *srq);
int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
+int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
+void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
@@ -1140,7 +1162,7 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
-void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
+void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
unsigned long end);
void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
@@ -1179,7 +1201,6 @@ void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev);
int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev);
void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev);
int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev);
-int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev);
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
int stage);
@@ -1228,22 +1249,20 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
u8 port_num);
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
- struct mlx5_ib_ucontext *context);
-void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
- struct mlx5_ib_ucontext *context);
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev);
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
- void *cmd_in, int inlen, int dest_id, int dest_type);
+ struct mlx5_flow_act *flow_act, void *cmd_in, int inlen,
+ int dest_id, int dest_type);
bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root);
+void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
#else
static inline int
-mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
- struct mlx5_ib_ucontext *context) { return -EOPNOTSUPP; };
-static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
- struct mlx5_ib_ucontext *context) {}
+mlx5_ib_devx_create(struct mlx5_ib_dev *dev) { return -EOPNOTSUPP; };
+static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
static inline const struct uverbs_object_tree_def *
mlx5_ib_get_devx_tree(void) { return NULL; }
static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
@@ -1256,6 +1275,11 @@ mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
{
return 0;
}
+static inline void
+mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
+{
+ return;
+};
#endif
static inline void init_query_mad(struct ib_smp *mad)
{
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 9fb1d9cb9401..9b195d65a13e 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -98,7 +98,7 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
static void update_odp_mr(struct mlx5_ib_mr *mr)
{
- if (mr->umem->odp_data) {
+ if (mr->umem->is_odp) {
/*
* This barrier prevents the compiler from moving the
* setting of umem->odp_data->private to point to our
@@ -107,7 +107,7 @@ static void update_odp_mr(struct mlx5_ib_mr *mr)
* handle invalidations.
*/
smp_wmb();
- mr->umem->odp_data->private = mr;
+ to_ib_umem_odp(mr->umem)->private = mr;
/*
* Make sure we will see the new
* umem->odp_data->private value in the invalidation
@@ -544,6 +544,9 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
int shrink = 0;
int c;
+ if (!mr->allocated_from_cache)
+ return;
+
c = order2idx(dev, mr->order);
if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
@@ -688,7 +691,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
init_completion(&ent->compl);
INIT_WORK(&ent->work, cache_work_func);
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
- queue_work(cache->wq, &ent->work);
if (i > MR_CACHE_LAST_STD_ENTRY) {
mlx5_odp_init_mr_cache_entry(ent);
@@ -708,6 +710,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
ent->limit = dev->mdev->profile->mr_cache[i].limit;
else
ent->limit = 0;
+ queue_work(cache->wq, &ent->work);
}
err = mlx5_mr_cache_debugfs_init(dev);
@@ -1624,14 +1627,16 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
struct ib_umem *umem = mr->umem;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (umem && umem->odp_data) {
+ if (umem && umem->is_odp) {
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem);
+
/* Prevent new page faults from succeeding */
mr->live = 0;
/* Wait for all running page-fault handlers to finish. */
synchronize_srcu(&dev->mr_srcu);
/* Destroy all page mappings */
- if (umem->odp_data->page_list)
- mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
+ if (umem_odp->page_list)
+ mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem),
ib_umem_end(umem));
else
mlx5_ib_free_implicit_mr(mr);
@@ -1647,18 +1652,19 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
umem = NULL;
}
#endif
-
clean_mr(dev, mr);
+ /*
+ * We should unregister the DMA address from the HCA before
+ * remove the DMA mapping.
+ */
+ mlx5_mr_cache_free(dev, mr);
if (umem) {
ib_umem_release(umem);
atomic_sub(npages, &dev->mdev->priv.reg_pages);
}
-
if (!mr->allocated_from_cache)
kfree(mr);
- else
- mlx5_mr_cache_free(dev, mr);
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index d216e0d2921d..b04eb6775326 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -61,13 +61,21 @@ static int check_parent(struct ib_umem_odp *odp,
return mr && mr->parent == parent && !odp->dying;
}
+struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr)
+{
+ if (WARN_ON(!mr || !mr->umem || !mr->umem->is_odp))
+ return NULL;
+
+ return to_ib_umem_odp(mr->umem)->per_mm;
+}
+
static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
{
struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent;
- struct ib_ucontext *ctx = odp->umem->context;
+ struct ib_ucontext_per_mm *per_mm = odp->per_mm;
struct rb_node *rb;
- down_read(&ctx->umem_rwsem);
+ down_read(&per_mm->umem_rwsem);
while (1) {
rb = rb_next(&odp->interval_tree.rb);
if (!rb)
@@ -79,19 +87,19 @@ static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
not_found:
odp = NULL;
end:
- up_read(&ctx->umem_rwsem);
+ up_read(&per_mm->umem_rwsem);
return odp;
}
-static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx,
- u64 start, u64 length,
+static struct ib_umem_odp *odp_lookup(u64 start, u64 length,
struct mlx5_ib_mr *parent)
{
+ struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(parent);
struct ib_umem_odp *odp;
struct rb_node *rb;
- down_read(&ctx->umem_rwsem);
- odp = rbt_ib_umem_lookup(&ctx->umem_tree, start, length);
+ down_read(&per_mm->umem_rwsem);
+ odp = rbt_ib_umem_lookup(&per_mm->umem_tree, start, length);
if (!odp)
goto end;
@@ -102,13 +110,13 @@ static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx,
if (!rb)
goto not_found;
odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
- if (ib_umem_start(odp->umem) > start + length)
+ if (ib_umem_start(&odp->umem) > start + length)
goto not_found;
}
not_found:
odp = NULL;
end:
- up_read(&ctx->umem_rwsem);
+ up_read(&per_mm->umem_rwsem);
return odp;
}
@@ -116,7 +124,6 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
size_t nentries, struct mlx5_ib_mr *mr, int flags)
{
struct ib_pd *pd = mr->ibmr.pd;
- struct ib_ucontext *ctx = pd->uobject->context;
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct ib_umem_odp *odp;
unsigned long va;
@@ -131,13 +138,13 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
return;
}
- odp = odp_lookup(ctx, offset * MLX5_IMR_MTT_SIZE,
- nentries * MLX5_IMR_MTT_SIZE, mr);
+ odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE,
+ nentries * MLX5_IMR_MTT_SIZE, mr);
for (i = 0; i < nentries; i++, pklm++) {
pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
va = (offset + i) * MLX5_IMR_MTT_SIZE;
- if (odp && odp->umem->address == va) {
+ if (odp && odp->umem.address == va) {
struct mlx5_ib_mr *mtt = odp->private;
pklm->key = cpu_to_be32(mtt->ibmr.lkey);
@@ -153,13 +160,13 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
static void mr_leaf_free_action(struct work_struct *work)
{
struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
- int idx = ib_umem_start(odp->umem) >> MLX5_IMR_MTT_SHIFT;
+ int idx = ib_umem_start(&odp->umem) >> MLX5_IMR_MTT_SHIFT;
struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
mr->parent = NULL;
synchronize_srcu(&mr->dev->mr_srcu);
- ib_umem_release(odp->umem);
+ ib_umem_release(&odp->umem);
if (imr->live)
mlx5_ib_update_xlt(imr, idx, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT |
@@ -170,22 +177,24 @@ static void mr_leaf_free_action(struct work_struct *work)
wake_up(&imr->q_leaf_free);
}
-void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
+void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
unsigned long end)
{
struct mlx5_ib_mr *mr;
const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT /
sizeof(struct mlx5_mtt)) - 1;
u64 idx = 0, blk_start_idx = 0;
+ struct ib_umem *umem;
int in_block = 0;
u64 addr;
- if (!umem || !umem->odp_data) {
+ if (!umem_odp) {
pr_err("invalidation called on NULL umem or non-ODP umem\n");
return;
}
+ umem = &umem_odp->umem;
- mr = umem->odp_data->private;
+ mr = umem_odp->private;
if (!mr || !mr->ibmr.pd)
return;
@@ -208,7 +217,7 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
* estimate the cost of another UMR vs. the cost of bigger
* UMR.
*/
- if (umem->odp_data->dma_list[idx] &
+ if (umem_odp->dma_list[idx] &
(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) {
if (!in_block) {
blk_start_idx = idx;
@@ -237,13 +246,13 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
* needed.
*/
- ib_umem_odp_unmap_dma_pages(umem, start, end);
+ ib_umem_odp_unmap_dma_pages(umem_odp, start, end);
if (unlikely(!umem->npages && mr->parent &&
- !umem->odp_data->dying)) {
- WRITE_ONCE(umem->odp_data->dying, 1);
+ !umem_odp->dying)) {
+ WRITE_ONCE(umem_odp->dying, 1);
atomic_inc(&mr->parent->num_leaf_free);
- schedule_work(&umem->odp_data->work);
+ schedule_work(&umem_odp->work);
}
}
@@ -366,16 +375,15 @@ fail:
static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
u64 io_virt, size_t bcnt)
{
- struct ib_ucontext *ctx = mr->ibmr.pd->uobject->context;
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device);
struct ib_umem_odp *odp, *result = NULL;
+ struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
u64 addr = io_virt & MLX5_IMR_MTT_MASK;
int nentries = 0, start_idx = 0, ret;
struct mlx5_ib_mr *mtt;
- struct ib_umem *umem;
- mutex_lock(&mr->umem->odp_data->umem_mutex);
- odp = odp_lookup(ctx, addr, 1, mr);
+ mutex_lock(&odp_mr->umem_mutex);
+ odp = odp_lookup(addr, 1, mr);
mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n",
io_virt, bcnt, addr, odp);
@@ -385,22 +393,23 @@ next_mr:
if (nentries)
nentries++;
} else {
- umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE);
- if (IS_ERR(umem)) {
- mutex_unlock(&mr->umem->odp_data->umem_mutex);
- return ERR_CAST(umem);
+ odp = ib_alloc_odp_umem(odp_mr->per_mm, addr,
+ MLX5_IMR_MTT_SIZE);
+ if (IS_ERR(odp)) {
+ mutex_unlock(&odp_mr->umem_mutex);
+ return ERR_CAST(odp);
}
- mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags);
+ mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0,
+ mr->access_flags);
if (IS_ERR(mtt)) {
- mutex_unlock(&mr->umem->odp_data->umem_mutex);
- ib_umem_release(umem);
+ mutex_unlock(&odp_mr->umem_mutex);
+ ib_umem_release(&odp->umem);
return ERR_CAST(mtt);
}
- odp = umem->odp_data;
odp->private = mtt;
- mtt->umem = umem;
+ mtt->umem = &odp->umem;
mtt->mmkey.iova = addr;
mtt->parent = mr;
INIT_WORK(&odp->work, mr_leaf_free_action);
@@ -417,7 +426,7 @@ next_mr:
addr += MLX5_IMR_MTT_SIZE;
if (unlikely(addr < io_virt + bcnt)) {
odp = odp_next(odp);
- if (odp && odp->umem->address != addr)
+ if (odp && odp->umem.address != addr)
odp = NULL;
goto next_mr;
}
@@ -432,7 +441,7 @@ next_mr:
}
}
- mutex_unlock(&mr->umem->odp_data->umem_mutex);
+ mutex_unlock(&odp_mr->umem_mutex);
return result;
}
@@ -460,36 +469,36 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
return imr;
}
-static int mr_leaf_free(struct ib_umem *umem, u64 start,
- u64 end, void *cookie)
+static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end,
+ void *cookie)
{
- struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie;
+ struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie;
+ struct ib_umem *umem = &umem_odp->umem;
if (mr->parent != imr)
return 0;
- ib_umem_odp_unmap_dma_pages(umem,
- ib_umem_start(umem),
+ ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
ib_umem_end(umem));
- if (umem->odp_data->dying)
+ if (umem_odp->dying)
return 0;
- WRITE_ONCE(umem->odp_data->dying, 1);
+ WRITE_ONCE(umem_odp->dying, 1);
atomic_inc(&imr->num_leaf_free);
- schedule_work(&umem->odp_data->work);
+ schedule_work(&umem_odp->work);
return 0;
}
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
{
- struct ib_ucontext *ctx = imr->ibmr.pd->uobject->context;
+ struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr);
- down_read(&ctx->umem_rwsem);
- rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX,
+ down_read(&per_mm->umem_rwsem);
+ rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0, ULLONG_MAX,
mr_leaf_free, true, imr);
- up_read(&ctx->umem_rwsem);
+ up_read(&per_mm->umem_rwsem);
wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
}
@@ -497,6 +506,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
u64 io_virt, size_t bcnt, u32 *bytes_mapped)
{
+ struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
u64 access_mask = ODP_READ_ALLOWED_BIT;
int npages = 0, page_shift, np;
u64 start_idx, page_mask;
@@ -505,7 +515,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
size_t size;
int ret;
- if (!mr->umem->odp_data->page_list) {
+ if (!odp_mr->page_list) {
odp = implicit_mr_get_data(mr, io_virt, bcnt);
if (IS_ERR(odp))
@@ -513,11 +523,11 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
mr = odp->private;
} else {
- odp = mr->umem->odp_data;
+ odp = odp_mr;
}
next_mr:
- size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
+ size = min_t(size_t, bcnt, ib_umem_end(&odp->umem) - io_virt);
page_shift = mr->umem->page_shift;
page_mask = ~(BIT(page_shift) - 1);
@@ -533,7 +543,7 @@ next_mr:
*/
smp_rmb();
- ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
+ ret = ib_umem_odp_map_dma_pages(to_ib_umem_odp(mr->umem), io_virt, size,
access_mask, current_seq);
if (ret < 0)
@@ -542,7 +552,8 @@ next_mr:
np = ret;
mutex_lock(&odp->umem_mutex);
- if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
+ if (!ib_umem_mmu_notifier_retry(to_ib_umem_odp(mr->umem),
+ current_seq)) {
/*
* No need to check whether the MTTs really belong to
* this MR, since ib_umem_odp_map_dma_pages already
@@ -575,7 +586,7 @@ next_mr:
io_virt += size;
next = odp_next(odp);
- if (unlikely(!next || next->umem->address != io_virt)) {
+ if (unlikely(!next || next->umem.address != io_virt)) {
mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
io_virt, next);
return -EAGAIN;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index daf1eb84cd31..829aec3aba8f 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -37,6 +37,7 @@
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
#include "ib_rep.h"
+#include "cmd.h"
/* not supported currently */
static int wq_signature;
@@ -850,6 +851,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
goto err_umem;
}
+ MLX5_SET(create_qp_in, *in, uid, to_mpd(pd)->uid);
pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
if (ubuffer->umem)
mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
@@ -1059,11 +1061,13 @@ static int is_connected(enum ib_qp_type qp_type)
static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
struct mlx5_ib_qp *qp,
- struct mlx5_ib_sq *sq, u32 tdn)
+ struct mlx5_ib_sq *sq, u32 tdn,
+ struct ib_pd *pd)
{
u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+ MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid);
MLX5_SET(tisc, tisc, transport_domain, tdn);
if (qp->flags & MLX5_IB_QP_UNDERLAY)
MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
@@ -1072,9 +1076,9 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
}
static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
- struct mlx5_ib_sq *sq)
+ struct mlx5_ib_sq *sq, struct ib_pd *pd)
{
- mlx5_core_destroy_tis(dev->mdev, sq->tisn);
+ mlx5_cmd_destroy_tis(dev->mdev, sq->tisn, to_mpd(pd)->uid);
}
static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
@@ -1114,6 +1118,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
goto err_umem;
}
+ MLX5_SET(create_sq_in, in, uid, to_mpd(pd)->uid);
sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
MLX5_SET(sqc, sqc, flush_in_error_en, 1);
if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe))
@@ -1188,7 +1193,7 @@ static size_t get_rq_pas_size(void *qpc)
static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq, void *qpin,
- size_t qpinlen)
+ size_t qpinlen, struct ib_pd *pd)
{
struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
__be64 *pas;
@@ -1209,6 +1214,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
if (!in)
return -ENOMEM;
+ MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid);
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING))
MLX5_SET(rqc, rqc, vsd, 1);
@@ -1256,10 +1262,23 @@ static bool tunnel_offload_supported(struct mlx5_core_dev *dev)
MLX5_CAP_ETH(dev, tunnel_stateless_geneve_rx));
}
+static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq,
+ u32 qp_flags_en,
+ struct ib_pd *pd)
+{
+ if (qp_flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC))
+ mlx5_ib_disable_lb(dev, false, true);
+ mlx5_cmd_destroy_tir(dev->mdev, rq->tirn, to_mpd(pd)->uid);
+}
+
static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq, u32 tdn,
- bool tunnel_offload_en)
+ u32 *qp_flags_en,
+ struct ib_pd *pd)
{
+ u8 lb_flag = 0;
u32 *in;
void *tirc;
int inlen;
@@ -1270,33 +1289,45 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
if (!in)
return -ENOMEM;
+ MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid);
tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
MLX5_SET(tirc, tirc, transport_domain, tdn);
- if (tunnel_offload_en)
+ if (*qp_flags_en & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
- if (dev->rep)
- MLX5_SET(tirc, tirc, self_lb_block,
- MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
+ if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC)
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+
+ if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
+
+ if (dev->rep) {
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+ *qp_flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
+ }
+
+ MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
+ if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
+ err = mlx5_ib_enable_lb(dev, false, true);
+
+ if (err)
+ destroy_raw_packet_qp_tir(dev, rq, 0, pd);
+ }
kvfree(in);
return err;
}
-static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
- struct mlx5_ib_rq *rq)
-{
- mlx5_core_destroy_tir(dev->mdev, rq->tirn);
-}
-
static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
u32 *in, size_t inlen,
- struct ib_pd *pd)
+ struct ib_pd *pd,
+ struct ib_udata *udata,
+ struct mlx5_ib_create_qp_resp *resp)
{
struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
@@ -1306,9 +1337,10 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
int err;
u32 tdn = mucontext->tdn;
+ u16 uid = to_mpd(pd)->uid;
if (qp->sq.wqe_cnt) {
- err = create_raw_packet_qp_tis(dev, qp, sq, tdn);
+ err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd);
if (err)
return err;
@@ -1316,6 +1348,13 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (err)
goto err_destroy_tis;
+ if (uid) {
+ resp->tisn = sq->tisn;
+ resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TISN;
+ resp->sqn = sq->base.mqp.qpn;
+ resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_SQN;
+ }
+
sq->base.container_mibqp = qp;
sq->base.mqp.event = mlx5_ib_qp_event;
}
@@ -1327,22 +1366,32 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
if (qp->flags & MLX5_IB_QP_PCI_WRITE_END_PADDING)
rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING;
- err = create_raw_packet_qp_rq(dev, rq, in, inlen);
+ err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd);
if (err)
goto err_destroy_sq;
-
- err = create_raw_packet_qp_tir(dev, rq, tdn,
- qp->tunnel_offload_en);
+ err = create_raw_packet_qp_tir(dev, rq, tdn, &qp->flags_en, pd);
if (err)
goto err_destroy_rq;
+
+ if (uid) {
+ resp->rqn = rq->base.mqp.qpn;
+ resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN;
+ resp->tirn = rq->tirn;
+ resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
+ }
}
qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
rq->base.mqp.qpn;
+ err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp)));
+ if (err)
+ goto err_destroy_tir;
return 0;
+err_destroy_tir:
+ destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, pd);
err_destroy_rq:
destroy_raw_packet_qp_rq(dev, rq);
err_destroy_sq:
@@ -1350,7 +1399,7 @@ err_destroy_sq:
return err;
destroy_raw_packet_qp_sq(dev, sq);
err_destroy_tis:
- destroy_raw_packet_qp_tis(dev, sq);
+ destroy_raw_packet_qp_tis(dev, sq, pd);
return err;
}
@@ -1363,13 +1412,13 @@ static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
if (qp->rq.wqe_cnt) {
- destroy_raw_packet_qp_tir(dev, rq);
+ destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, qp->ibqp.pd);
destroy_raw_packet_qp_rq(dev, rq);
}
if (qp->sq.wqe_cnt) {
destroy_raw_packet_qp_sq(dev, sq);
- destroy_raw_packet_qp_tis(dev, sq);
+ destroy_raw_packet_qp_tis(dev, sq, qp->ibqp.pd);
}
}
@@ -1387,7 +1436,11 @@ static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
- mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn);
+ if (qp->flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC))
+ mlx5_ib_disable_lb(dev, false, true);
+ mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn,
+ to_mpd(qp->ibqp.pd)->uid);
}
static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
@@ -1410,6 +1463,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
u32 tdn = mucontext->tdn;
struct mlx5_ib_create_qp_rss ucmd = {};
size_t required_cmd_sz;
+ u8 lb_flag = 0;
if (init_attr->qp_type != IB_QPT_RAW_PACKET)
return -EOPNOTSUPP;
@@ -1444,7 +1498,9 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
return -EOPNOTSUPP;
}
- if (ucmd.flags & ~MLX5_QP_FLAG_TUNNEL_OFFLOADS) {
+ if (ucmd.flags & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS |
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)) {
mlx5_ib_dbg(dev, "invalid flags\n");
return -EOPNOTSUPP;
}
@@ -1461,6 +1517,16 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
return -EOPNOTSUPP;
}
+ if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->rep) {
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+ qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
+ }
+
+ if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) {
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
+ qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
+ }
+
err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
if (err) {
mlx5_ib_dbg(dev, "copy failed\n");
@@ -1472,6 +1538,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (!in)
return -ENOMEM;
+ MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid);
tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
MLX5_SET(tirc, tirc, disp_type,
MLX5_TIRC_DISP_TYPE_INDIRECT);
@@ -1484,6 +1551,8 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
+ MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
+
if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER)
hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner);
else
@@ -1580,21 +1649,36 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
create_tir:
- if (dev->rep)
- MLX5_SET(tirc, tirc, self_lb_block,
- MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
-
err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
+ if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
+ err = mlx5_ib_enable_lb(dev, false, true);
+
+ if (err)
+ mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn,
+ to_mpd(pd)->uid);
+ }
+
if (err)
goto err;
+ if (mucontext->devx_uid) {
+ resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
+ resp.tirn = qp->rss_qp.tirn;
+ }
+
+ err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
+ if (err)
+ goto err_copy;
+
kvfree(in);
/* qpn is reserved for that QP */
qp->trans_qp.base.mqp.qpn = 0;
qp->flags |= MLX5_IB_QP_RSS;
return 0;
+err_copy:
+ mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn, mucontext->devx_uid);
err:
kvfree(in);
return err;
@@ -1710,7 +1794,23 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
mlx5_ib_dbg(dev, "Tunnel offload isn't supported\n");
return -EOPNOTSUPP;
}
- qp->tunnel_offload_en = true;
+ qp->flags_en |= MLX5_QP_FLAG_TUNNEL_OFFLOADS;
+ }
+
+ if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) {
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+ mlx5_ib_dbg(dev, "Self-LB UC isn't supported\n");
+ return -EOPNOTSUPP;
+ }
+ qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
+ }
+
+ if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) {
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+ mlx5_ib_dbg(dev, "Self-LB UM isn't supported\n");
+ return -EOPNOTSUPP;
+ }
+ qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
}
if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) {
@@ -1911,7 +2011,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->flags & MLX5_IB_QP_UNDERLAY) {
qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
- err = create_raw_packet_qp(dev, qp, in, inlen, pd);
+ err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata,
+ &resp);
} else {
err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
}
@@ -2192,6 +2293,7 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
goto err_free;
}
+ MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid);
dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
qp->qp_sub_type = MLX5_IB_QPT_DCT;
MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn);
@@ -2458,7 +2560,8 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
}
static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
- struct mlx5_ib_sq *sq, u8 sl)
+ struct mlx5_ib_sq *sq, u8 sl,
+ struct ib_pd *pd)
{
void *in;
void *tisc;
@@ -2471,6 +2574,7 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
return -ENOMEM;
MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
+ MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid);
tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
@@ -2483,7 +2587,8 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
}
static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
- struct mlx5_ib_sq *sq, u8 tx_affinity)
+ struct mlx5_ib_sq *sq, u8 tx_affinity,
+ struct ib_pd *pd)
{
void *in;
void *tisc;
@@ -2496,6 +2601,7 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
return -ENOMEM;
MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1);
+ MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid);
tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity);
@@ -2580,7 +2686,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
return modify_raw_packet_eth_prio(dev->mdev,
&qp->raw_packet_qp.sq,
- sl & 0xf);
+ sl & 0xf, qp->ibqp.pd);
return 0;
}
@@ -2728,9 +2834,9 @@ static int ib_mask_to_mlx5_opt(int ib_mask)
return result;
}
-static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
- struct mlx5_ib_rq *rq, int new_state,
- const struct mlx5_modify_raw_qp_param *raw_qp_param)
+static int modify_raw_packet_qp_rq(
+ struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, int new_state,
+ const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd)
{
void *in;
void *rqc;
@@ -2743,6 +2849,7 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
return -ENOMEM;
MLX5_SET(modify_rq_in, in, rq_state, rq->state);
+ MLX5_SET(modify_rq_in, in, uid, to_mpd(pd)->uid);
rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
MLX5_SET(rqc, rqc, state, new_state);
@@ -2753,8 +2860,9 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id);
} else
- pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n",
- dev->ib_dev.name);
+ dev_info_once(
+ &dev->ib_dev.dev,
+ "RAW PACKET QP counters are not supported on current FW\n");
}
err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in, inlen);
@@ -2768,10 +2876,9 @@ out:
return err;
}
-static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
- struct mlx5_ib_sq *sq,
- int new_state,
- const struct mlx5_modify_raw_qp_param *raw_qp_param)
+static int modify_raw_packet_qp_sq(
+ struct mlx5_core_dev *dev, struct mlx5_ib_sq *sq, int new_state,
+ const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd)
{
struct mlx5_ib_qp *ibqp = sq->base.container_mibqp;
struct mlx5_rate_limit old_rl = ibqp->rl;
@@ -2788,6 +2895,7 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
if (!in)
return -ENOMEM;
+ MLX5_SET(modify_sq_in, in, uid, to_mpd(pd)->uid);
MLX5_SET(modify_sq_in, in, sq_state, sq->state);
sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
@@ -2890,7 +2998,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
}
if (modify_rq) {
- err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param);
+ err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param,
+ qp->ibqp.pd);
if (err)
return err;
}
@@ -2898,17 +3007,50 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (modify_sq) {
if (tx_affinity) {
err = modify_raw_packet_tx_affinity(dev->mdev, sq,
- tx_affinity);
+ tx_affinity,
+ qp->ibqp.pd);
if (err)
return err;
}
- return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, raw_qp_param);
+ return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state,
+ raw_qp_param, qp->ibqp.pd);
}
return 0;
}
+static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_pd *pd,
+ struct mlx5_ib_qp_base *qp_base,
+ u8 port_num)
+{
+ struct mlx5_ib_ucontext *ucontext = NULL;
+ unsigned int tx_port_affinity;
+
+ if (pd && pd->ibpd.uobject && pd->ibpd.uobject->context)
+ ucontext = to_mucontext(pd->ibpd.uobject->context);
+
+ if (ucontext) {
+ tx_port_affinity = (unsigned int)atomic_add_return(
+ 1, &ucontext->tx_port_affinity) %
+ MLX5_MAX_PORTS +
+ 1;
+ mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n",
+ tx_port_affinity, qp_base->mqp.qpn, ucontext);
+ } else {
+ tx_port_affinity =
+ (unsigned int)atomic_add_return(
+ 1, &dev->roce[port_num].tx_port_affinity) %
+ MLX5_MAX_PORTS +
+ 1;
+ mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n",
+ tx_port_affinity, qp_base->mqp.qpn);
+ }
+
+ return tx_port_affinity;
+}
+
static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state,
@@ -2974,6 +3116,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (!context)
return -ENOMEM;
+ pd = get_pd(qp);
context->flags = cpu_to_be32(mlx5_st << 16);
if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
@@ -3002,9 +3145,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
(ibqp->qp_type == IB_QPT_XRC_TGT)) {
if (mlx5_lag_is_active(dev->mdev)) {
u8 p = mlx5_core_native_port_num(dev->mdev);
- tx_affinity = (unsigned int)atomic_add_return(1,
- &dev->roce[p].next_port) %
- MLX5_MAX_PORTS + 1;
+ tx_affinity = get_tx_affinity(dev, pd, base, p);
context->flags |= cpu_to_be32(tx_affinity << 24);
}
}
@@ -3062,7 +3203,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
goto out;
}
- pd = get_pd(qp);
get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
&send_cq, &recv_cq);
@@ -3243,7 +3383,9 @@ static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new
int req = IB_QP_STATE;
int opt = 0;
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ if (new_state == IB_QPS_RESET) {
+ return is_valid_mask(attr_mask, req, opt);
+ } else if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
req |= IB_QP_PKEY_INDEX | IB_QP_PORT;
return is_valid_mask(attr_mask, req, opt);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
@@ -3367,7 +3509,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
size_t required_cmd_sz;
int err = -EINVAL;
int port;
- enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
if (ibqp->rwq_ind_tbl)
return -ENOSYS;
@@ -3413,7 +3554,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
}
if (qp->flags & MLX5_IB_QP_UNDERLAY) {
@@ -3424,7 +3564,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
} else if (qp_type != MLX5_IB_QPT_REG_UMR &&
qp_type != MLX5_IB_QPT_DCI &&
- !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) {
+ !ib_modify_qp_is_ok(cur_state, new_state, qp_type,
+ attr_mask)) {
mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
cur_state, new_state, ibqp->qp_type, attr_mask);
goto out;
@@ -4371,6 +4512,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
u8 next_fence = 0;
u8 fence;
+ if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
+ !drain)) {
+ *bad_wr = wr;
+ return -EIO;
+ }
+
if (unlikely(ibqp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
@@ -4380,13 +4527,6 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
spin_lock_irqsave(&qp->sq.lock, flags);
- if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
- err = -EIO;
- *bad_wr = wr;
- nreq = 0;
- goto out;
- }
-
for (nreq = 0; wr; nreq++, wr = wr->next) {
if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
mlx5_ib_warn(dev, "\n");
@@ -4700,18 +4840,17 @@ static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
int ind;
int i;
+ if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
+ !drain)) {
+ *bad_wr = wr;
+ return -EIO;
+ }
+
if (unlikely(ibqp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
spin_lock_irqsave(&qp->rq.lock, flags);
- if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
- err = -EIO;
- *bad_wr = wr;
- nreq = 0;
- goto out;
- }
-
ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
for (nreq = 0; wr; nreq++, wr = wr->next) {
@@ -5175,6 +5314,7 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_xrcd *xrcd;
int err;
+ u16 uid;
if (!MLX5_CAP_GEN(dev->mdev, xrc))
return ERR_PTR(-ENOSYS);
@@ -5183,12 +5323,14 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
if (!xrcd)
return ERR_PTR(-ENOMEM);
- err = mlx5_core_xrcd_alloc(dev->mdev, &xrcd->xrcdn);
+ uid = context ? to_mucontext(context)->devx_uid : 0;
+ err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, uid);
if (err) {
kfree(xrcd);
return ERR_PTR(-ENOMEM);
}
+ xrcd->uid = uid;
return &xrcd->ibxrcd;
}
@@ -5196,9 +5338,10 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
{
struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
+ u16 uid = to_mxrcd(xrcd)->uid;
int err;
- err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn);
+ err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, uid);
if (err)
mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
@@ -5268,6 +5411,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
if (!in)
return -ENOMEM;
+ MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid);
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
MLX5_SET(rqc, rqc, mem_rq_type,
MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
@@ -5443,8 +5587,7 @@ static int prepare_user_rq(struct ib_pd *pd,
err = create_user_rq(dev, pd, rwq, &ucmd);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
- if (err)
- return err;
+ return err;
}
rwq->user_index = ucmd.user_index;
@@ -5573,6 +5716,9 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
for (i = 0; i < sz; i++)
MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num);
+ rwq_ind_tbl->uid = to_mpd(init_attr->ind_tbl[0]->pd)->uid;
+ MLX5_SET(create_rqt_in, in, uid, rwq_ind_tbl->uid);
+
err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
kvfree(in);
@@ -5591,7 +5737,7 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
return &rwq_ind_tbl->ib_rwq_ind_tbl;
err_copy:
- mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
+ mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
err:
kfree(rwq_ind_tbl);
return ERR_PTR(err);
@@ -5602,7 +5748,7 @@ int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
- mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
+ mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
kfree(rwq_ind_tbl);
return 0;
@@ -5653,6 +5799,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
if (wq_state == IB_WQS_ERR)
wq_state = MLX5_RQC_STATE_ERR;
MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
+ MLX5_SET(modify_rq_in, in, uid, to_mpd(wq->pd)->uid);
MLX5_SET(rqc, rqc, state, wq_state);
if (wq_attr_mask & IB_WQ_FLAGS) {
@@ -5684,8 +5831,9 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
MLX5_SET(rqc, rqc, counter_set_id,
dev->port->cnts.set_id);
} else
- pr_info_once("%s: Receive WQ counters are not supported on current FW\n",
- dev->ib_dev.name);
+ dev_info_once(
+ &dev->ib_dev.dev,
+ "Receive WQ counters are not supported on current FW\n");
}
err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index d359fecf7a5b..d012e7dbcc38 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -144,6 +144,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
in->page_offset = offset;
+ in->uid = to_mpd(pd)->uid;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
in->type != IB_SRQT_BASIC)
in->user_index = uidx;
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 093f7755c843..2e5dc0a67cfc 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -58,8 +58,9 @@ static int mthca_update_rate(struct mthca_dev *dev, u8 port_num)
ret = ib_query_port(&dev->ib_dev, port_num, tprops);
if (ret) {
- printk(KERN_WARNING "ib_query_port failed (%d) for %s port %d\n",
- ret, dev->ib_dev.name, port_num);
+ dev_warn(&dev->ib_dev.dev,
+ "ib_query_port failed (%d) forport %d\n", ret,
+ port_num);
goto out;
}
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index f3e80dec1334..92c49bff22bc 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -986,7 +986,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
goto err_free_dev;
}
- if (mthca_cmd_init(mdev)) {
+ err = mthca_cmd_init(mdev);
+ if (err) {
mthca_err(mdev, "Failed to init command interface, aborting.\n");
goto err_free_dev;
}
@@ -1014,8 +1015,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
err = mthca_setup_hca(mdev);
if (err == -EBUSY && (mdev->mthca_flags & MTHCA_FLAG_MSI_X)) {
- if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
- pci_free_irq_vectors(pdev);
+ pci_free_irq_vectors(pdev);
mdev->mthca_flags &= ~MTHCA_FLAG_MSI_X;
err = mthca_setup_hca(mdev);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 0d3473b4596e..691c6f048938 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1076,16 +1076,17 @@ static int mthca_unmap_fmr(struct list_head *fmr_list)
return err;
}
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mthca_dev *dev =
container_of(device, struct mthca_dev, ib_dev.dev);
return sprintf(buf, "%x\n", dev->rev_id);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mthca_dev *dev =
container_of(device, struct mthca_dev, ib_dev.dev);
@@ -1103,23 +1104,26 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
return sprintf(buf, "unknown\n");
}
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mthca_dev *dev =
container_of(device, struct mthca_dev, ib_dev.dev);
return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
}
+static DEVICE_ATTR_RO(board_id);
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static struct attribute *mthca_dev_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
+};
-static struct device_attribute *mthca_dev_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id
+static const struct attribute_group mthca_attr_group = {
+ .attrs = mthca_dev_attributes,
};
static int mthca_init_node_data(struct mthca_dev *dev)
@@ -1192,13 +1196,11 @@ static void get_dev_fw_str(struct ib_device *device, char *str)
int mthca_register_device(struct mthca_dev *dev)
{
int ret;
- int i;
ret = mthca_init_node_data(dev);
if (ret)
return ret;
- strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION;
@@ -1296,20 +1298,12 @@ int mthca_register_device(struct mthca_dev *dev)
mutex_init(&dev->cap_mask_mutex);
+ rdma_set_device_sysfs_group(&dev->ib_dev, &mthca_attr_group);
dev->ib_dev.driver_id = RDMA_DRIVER_MTHCA;
- ret = ib_register_device(&dev->ib_dev, NULL);
+ ret = ib_register_device(&dev->ib_dev, "mthca%d", NULL);
if (ret)
return ret;
- for (i = 0; i < ARRAY_SIZE(mthca_dev_attributes); ++i) {
- ret = device_create_file(&dev->ib_dev.dev,
- mthca_dev_attributes[i]);
- if (ret) {
- ib_unregister_device(&dev->ib_dev);
- return ret;
- }
- }
-
mthca_start_catas_poll(dev);
return 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 3d37f2373d63..9d178ee3c96a 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -872,8 +872,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_UNSPECIFIED)) {
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+ attr_mask)) {
mthca_dbg(dev, "Bad QP transition (transport %d) "
"%d->%d with attr 0x%08x\n",
qp->transport, cur_state, new_state,
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 42b68aa999fc..e00add6d78ec 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -456,9 +456,6 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
void __iomem *mmio_regs = NULL;
u8 hw_rev;
- assert(pcidev != NULL);
- assert(ent != NULL);
-
printk(KERN_INFO PFX "NetEffect RNIC driver v%s loading. (%s)\n",
DRV_VERSION, pci_name(pcidev));
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index bedaa02749fb..a895fe980d10 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -149,18 +149,9 @@ do { \
printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \
} while (0)
-#define assert(expr) \
-do { \
- if (!(expr)) { \
- printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \
- #expr, __FILE__, __func__, __LINE__); \
- } \
-} while (0)
-
#define NES_EVENT_TIMEOUT 1200000
#else
#define nes_debug(level, fmt, args...) no_printk(fmt, ##args)
-#define assert(expr) do {} while (0)
#define NES_EVENT_TIMEOUT 100000
#endif
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index bd0675d8f298..5517e392bc01 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -1443,7 +1443,7 @@ static int nes_init_2025_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_inde
mdelay(1);
nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- } while ((temp_phy_data2 == temp_phy_data));
+ } while (temp_phy_data2 == temp_phy_data);
/* wait for tracking */
counter = 0;
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 61014e251555..16f33454c198 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -146,8 +146,6 @@ static int nes_netdev_open(struct net_device *netdev)
struct list_head *list_pos, *list_temp;
unsigned long flags;
- assert(nesdev != NULL);
-
if (nesvnic->netdev_open == 1)
return 0;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 6940c7215961..92d1cadd4cfd 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -687,7 +687,7 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
}
nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n",
- nespd, nesvnic->nesibdev->ibdev.name);
+ nespd, dev_name(&nesvnic->nesibdev->ibdev.dev));
nespd->pd_id = (pd_num << (PAGE_SHIFT-12)) + nesadapter->base_pd;
@@ -2556,8 +2556,8 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
/**
* show_rev
*/
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct nes_ib_device *nesibdev =
container_of(dev, struct nes_ib_device, ibdev.dev);
@@ -2566,40 +2566,40 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
nes_debug(NES_DBG_INIT, "\n");
return sprintf(buf, "%x\n", nesvnic->nesdev->nesadapter->hw_rev);
}
-
+static DEVICE_ATTR_RO(hw_rev);
/**
* show_hca
*/
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
nes_debug(NES_DBG_INIT, "\n");
return sprintf(buf, "NES020\n");
}
-
+static DEVICE_ATTR_RO(hca_type);
/**
* show_board
*/
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
nes_debug(NES_DBG_INIT, "\n");
return sprintf(buf, "%.*s\n", 32, "NES020 Board ID");
}
+static DEVICE_ATTR_RO(board_id);
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-
-static struct device_attribute *nes_dev_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id
+static struct attribute *nes_dev_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
};
+static const struct attribute_group nes_attr_group = {
+ .attrs = nes_dev_attributes,
+};
/**
* nes_query_qp
@@ -3640,7 +3640,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
if (nesibdev == NULL) {
return NULL;
}
- strlcpy(nesibdev->ibdev.name, "nes%d", IB_DEVICE_NAME_MAX);
nesibdev->ibdev.owner = THIS_MODULE;
nesibdev->ibdev.node_type = RDMA_NODE_RNIC;
@@ -3795,10 +3794,11 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
struct nes_vnic *nesvnic = nesibdev->nesvnic;
struct nes_device *nesdev = nesvnic->nesdev;
struct nes_adapter *nesadapter = nesdev->nesadapter;
- int i, ret;
+ int ret;
+ rdma_set_device_sysfs_group(&nesvnic->nesibdev->ibdev, &nes_attr_group);
nesvnic->nesibdev->ibdev.driver_id = RDMA_DRIVER_NES;
- ret = ib_register_device(&nesvnic->nesibdev->ibdev, NULL);
+ ret = ib_register_device(&nesvnic->nesibdev->ibdev, "nes%d", NULL);
if (ret) {
return ret;
}
@@ -3809,19 +3809,6 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
nesibdev->max_qp = (nesadapter->max_qp-NES_FIRST_QPN) / nesadapter->port_count;
nesibdev->max_pd = nesadapter->max_pd / nesadapter->port_count;
- for (i = 0; i < ARRAY_SIZE(nes_dev_attributes); ++i) {
- ret = device_create_file(&nesibdev->ibdev.dev, nes_dev_attributes[i]);
- if (ret) {
- while (i > 0) {
- i--;
- device_remove_file(&nesibdev->ibdev.dev,
- nes_dev_attributes[i]);
- }
- ib_unregister_device(&nesibdev->ibdev);
- return ret;
- }
- }
-
nesvnic->of_device_registered = 1;
return 0;
@@ -3834,15 +3821,9 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev)
{
struct nes_vnic *nesvnic = nesibdev->nesvnic;
- int i;
- for (i = 0; i < ARRAY_SIZE(nes_dev_attributes); ++i) {
- device_remove_file(&nesibdev->ibdev.dev, nes_dev_attributes[i]);
- }
-
- if (nesvnic->of_device_registered) {
+ if (nesvnic->of_device_registered)
ib_unregister_device(&nesibdev->ibdev);
- }
nesvnic->of_device_registered = 0;
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index e578281471af..241a57a07485 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -792,7 +792,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
qp->srq->ibsrq.
srq_context);
} else if (dev_event) {
- pr_err("%s: Fatal event received\n", dev->ibdev.name);
+ dev_err(&dev->ibdev.dev, "Fatal event received\n");
ib_dispatch_event(&ib_evt);
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 7832ee3e0c84..873cc7f6fe61 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -114,9 +114,37 @@ static void get_dev_fw_str(struct ib_device *device, char *str)
snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", &dev->attr.fw_ver[0]);
}
+/* OCRDMA sysfs interface */
+static ssize_t hw_rev_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
+}
+static DEVICE_ATTR_RO(hw_rev);
+
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]);
+}
+static DEVICE_ATTR_RO(hca_type);
+
+static struct attribute *ocrdma_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ NULL
+};
+
+static const struct attribute_group ocrdma_attr_group = {
+ .attrs = ocrdma_attributes,
+};
+
static int ocrdma_register_device(struct ocrdma_dev *dev)
{
- strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX);
ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid);
BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC,
@@ -213,8 +241,9 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.destroy_srq = ocrdma_destroy_srq;
dev->ibdev.post_srq_recv = ocrdma_post_srq_recv;
}
+ rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group);
dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA;
- return ib_register_device(&dev->ibdev, NULL);
+ return ib_register_device(&dev->ibdev, "ocrdma%d", NULL);
}
static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
@@ -260,42 +289,9 @@ static void ocrdma_free_resources(struct ocrdma_dev *dev)
kfree(dev->cq_tbl);
}
-/* OCRDMA sysfs interface */
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct ocrdma_dev *dev = dev_get_drvdata(device);
-
- return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
-}
-
-static ssize_t show_hca_type(struct device *device,
- struct device_attribute *attr, char *buf)
-{
- struct ocrdma_dev *dev = dev_get_drvdata(device);
-
- return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]);
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
-
-static struct device_attribute *ocrdma_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type
-};
-
-static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
- device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
-}
-
static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
{
- int status = 0, i;
+ int status = 0;
u8 lstate = 0;
struct ocrdma_dev *dev;
@@ -331,9 +327,6 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
if (!status)
ocrdma_update_link_state(dev, lstate);
- for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
- if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
- goto sysfs_err;
/* Init stats */
ocrdma_add_port_stats(dev);
/* Interrupt Moderation */
@@ -348,8 +341,6 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
dev_name(&dev->nic_info.pdev->dev), dev->id);
return dev;
-sysfs_err:
- ocrdma_remove_sysfiles(dev);
alloc_err:
ocrdma_free_resources(dev);
ocrdma_cleanup_hw(dev);
@@ -376,7 +367,6 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
* of the registered clients.
*/
cancel_delayed_work_sync(&dev->eqd_work);
- ocrdma_remove_sysfiles(dev);
ib_unregister_device(&dev->ibdev);
ocrdma_rem_port_stats(dev);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 24d20a4aa262..290d776edf48 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -764,7 +764,8 @@ void ocrdma_add_port_stats(struct ocrdma_dev *dev)
return;
/* Create post stats base dir */
- dev->dir = debugfs_create_dir(dev->ibdev.name, ocrdma_dbgfs_dir);
+ dev->dir =
+ debugfs_create_dir(dev_name(&dev->ibdev.dev), ocrdma_dbgfs_dir);
if (!dev->dir)
goto err;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index c158ca9fde6d..06d2a7f3304c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -1480,8 +1480,7 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_qps = old_qps;
spin_unlock_irqrestore(&qp->q_lock, flags);
- if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_ETHERNET)) {
+ if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
"qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
__func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index a0af6d424aed..8d6ff9df49fe 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -133,6 +133,33 @@ static int qedr_iw_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
+/* QEDR sysfs interface */
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct qedr_dev *dev = dev_get_drvdata(device);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor);
+}
+static DEVICE_ATTR_RO(hw_rev);
+
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET");
+}
+static DEVICE_ATTR_RO(hca_type);
+
+static struct attribute *qedr_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ NULL
+};
+
+static const struct attribute_group qedr_attr_group = {
+ .attrs = qedr_attributes,
+};
+
static int qedr_iw_register_device(struct qedr_dev *dev)
{
dev->ibdev.node_type = RDMA_NODE_RNIC;
@@ -170,8 +197,6 @@ static int qedr_register_device(struct qedr_dev *dev)
{
int rc;
- strlcpy(dev->ibdev.name, "qedr%d", IB_DEVICE_NAME_MAX);
-
dev->ibdev.node_guid = dev->attr.node_guid;
memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC));
dev->ibdev.owner = THIS_MODULE;
@@ -262,9 +287,9 @@ static int qedr_register_device(struct qedr_dev *dev)
dev->ibdev.get_link_layer = qedr_link_layer;
dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
-
+ rdma_set_device_sysfs_group(&dev->ibdev, &qedr_attr_group);
dev->ibdev.driver_id = RDMA_DRIVER_QEDR;
- return ib_register_device(&dev->ibdev, NULL);
+ return ib_register_device(&dev->ibdev, "qedr%d", NULL);
}
/* This function allocates fast-path status block memory */
@@ -404,37 +429,6 @@ err1:
return rc;
}
-/* QEDR sysfs interface */
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct qedr_dev *dev = dev_get_drvdata(device);
-
- return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor);
-}
-
-static ssize_t show_hca_type(struct device *device,
- struct device_attribute *attr, char *buf)
-{
- return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET");
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
-
-static struct device_attribute *qedr_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type
-};
-
-static void qedr_remove_sysfiles(struct qedr_dev *dev)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
- device_remove_file(&dev->ibdev.dev, qedr_attributes[i]);
-}
-
static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev *pdev)
{
int rc = pci_enable_atomic_ops_to_root(pdev,
@@ -855,7 +849,7 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
{
struct qed_dev_rdma_info dev_info;
struct qedr_dev *dev;
- int rc = 0, i;
+ int rc = 0;
dev = (struct qedr_dev *)ib_alloc_device(sizeof(*dev));
if (!dev) {
@@ -914,18 +908,12 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
goto reg_err;
}
- for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
- if (device_create_file(&dev->ibdev.dev, qedr_attributes[i]))
- goto sysfs_err;
-
if (!test_and_set_bit(QEDR_ENET_STATE_BIT, &dev->enet_state))
qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ACTIVE);
DP_DEBUG(dev, QEDR_MSG_INIT, "qedr driver loaded successfully\n");
return dev;
-sysfs_err:
- ib_unregister_device(&dev->ibdev);
reg_err:
qedr_sync_free_irqs(dev);
irq_err:
@@ -944,7 +932,6 @@ static void qedr_remove(struct qedr_dev *dev)
/* First unregister with stack to stop all the active traffic
* of the registered clients.
*/
- qedr_remove_sysfiles(dev);
ib_unregister_device(&dev->ibdev);
qedr_stop_hw(dev);
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index a2d708dceb8d..53bbe6b4e6e6 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -43,7 +43,7 @@
#include "qedr_hsi_rdma.h"
#define QEDR_NODE_DESC "QLogic 579xx RoCE HCA"
-#define DP_NAME(dev) ((dev)->ibdev.name)
+#define DP_NAME(_dev) dev_name(&(_dev)->ibdev.dev)
#define IS_IWARP(_dev) ((_dev)->rdma_type == QED_RDMA_TYPE_IWARP)
#define IS_ROCE(_dev) ((_dev)->rdma_type == QED_RDMA_TYPE_ROCE)
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
index 85578887421b..e1ac2fd60bb1 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
@@ -519,9 +519,9 @@ static inline int qedr_gsi_build_packet(struct qedr_dev *dev,
}
if (ether_addr_equal(udh.eth.smac_h, udh.eth.dmac_h))
- packet->tx_dest = QED_ROCE_LL2_TX_DEST_LB;
+ packet->tx_dest = QED_LL2_TX_DEST_LB;
else
- packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW;
+ packet->tx_dest = QED_LL2_TX_DEST_NW;
packet->roce_mode = roce_mode;
memcpy(packet->header.vaddr, ud_header_buffer, header_size);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 8cc3df24e04e..82ee4b4a7084 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -1447,7 +1447,6 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
u64 pbl_base_addr, phy_prod_pair_addr;
struct ib_ucontext *ib_ctx = NULL;
struct qedr_srq_hwq_info *hw_srq;
- struct qedr_ucontext *ctx = NULL;
u32 page_cnt, page_size;
struct qedr_srq *srq;
int rc = 0;
@@ -1473,7 +1472,6 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
if (udata && ibpd->uobject && ibpd->uobject->context) {
ib_ctx = ibpd->uobject->context;
- ctx = get_qedr_ucontext(ib_ctx);
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
DP_ERR(dev,
@@ -2240,8 +2238,7 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (rdma_protocol_roce(&dev->ibdev, 1)) {
if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
- ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_ETHERNET)) {
+ ibqp->qp_type, attr_mask)) {
DP_ERR(dev,
"modify qp: invalid attribute mask=0x%x specified for\n"
"qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 3461df002f81..83d2349188db 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1390,13 +1390,13 @@ static inline u32 qib_get_hdrqtail(const struct qib_ctxtdata *rcd)
*/
extern const char ib_qib_version[];
+extern const struct attribute_group qib_attr_group;
int qib_device_create(struct qib_devdata *);
void qib_device_remove(struct qib_devdata *);
int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
struct kobject *kobj);
-int qib_verbs_register_sysfs(struct qib_devdata *);
void qib_verbs_unregister_sysfs(struct qib_devdata *);
/* Hook for sysfs read of QSFP */
extern int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len);
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 344e401915f7..a81905df2d0f 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -378,25 +378,22 @@ void qib_flush_qp_waiters(struct rvt_qp *qp)
* qib_check_send_wqe - validate wr/wqe
* @qp - The qp
* @wqe - The built wqe
+ * @call_send - Determine if the send should be posted or scheduled
*
- * validate wr/wqe. This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
- *
- * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure
+ * Returns 0 on success, -EINVAL on failure
*/
int qib_check_send_wqe(struct rvt_qp *qp,
- struct rvt_swqe *wqe)
+ struct rvt_swqe *wqe, bool *call_send)
{
struct rvt_ah *ah;
- int ret = 0;
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
+ if (wqe->length > qp->pmtu)
+ *call_send = false;
break;
case IB_QPT_SMI:
case IB_QPT_GSI:
@@ -405,12 +402,12 @@ int qib_check_send_wqe(struct rvt_qp *qp,
if (wqe->length > (1 << ah->log_pmtu))
return -EINVAL;
/* progress hint */
- ret = 1;
+ *call_send = true;
break;
default:
break;
}
- return ret;
+ return 0;
}
#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index f35fdeb14347..6fa002940451 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -254,7 +254,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
goto bail;
}
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
+ rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
/* will get called again */
goto done;
@@ -838,7 +838,7 @@ void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
qib_migrate_qp(qp);
qp->s_retry = qp->s_retry_cnt;
} else if (qp->s_last == qp->s_acked) {
- qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
return;
} else /* XXX need to handle delayed completion */
@@ -1221,7 +1221,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
ibp->rvp.n_other_naks++;
class_b:
if (qp->s_last == qp->s_acked) {
- qib_send_complete(qp, wqe, status);
+ rvt_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
break;
@@ -1425,7 +1425,8 @@ read_middle:
qp->s_rdma_read_len -= pmtu;
update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags);
- qib_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0);
+ rvt_copy_sge(qp, &qp->s_rdma_read_sge,
+ data, pmtu, false, false);
goto bail;
case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1471,7 +1472,8 @@ read_last:
if (unlikely(tlen != qp->s_rdma_read_len))
goto ack_len_err;
aeth = be32_to_cpu(ohdr->u.aeth);
- qib_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0);
+ rvt_copy_sge(qp, &qp->s_rdma_read_sge,
+ data, tlen, false, false);
WARN_ON(qp->s_rdma_read_sge.num_sge);
(void) do_rc_ack(qp, aeth, psn,
OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
@@ -1490,7 +1492,7 @@ ack_len_err:
status = IB_WC_LOC_LEN_ERR;
ack_err:
if (qp->s_last == qp->s_acked) {
- qib_send_complete(qp, wqe, status);
+ rvt_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
ack_done:
@@ -1844,7 +1846,7 @@ send_middle:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto nack_inv;
- qib_copy_sge(&qp->r_sge, data, pmtu, 1);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -1890,7 +1892,7 @@ send_last:
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
- qib_copy_sge(&qp->r_sge, data, tlen, 1);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
qp->r_msn++;
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index f8a7de795beb..1fa21938f310 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -171,307 +171,6 @@ err:
}
/**
- * qib_ruc_loopback - handle UC and RC lookback requests
- * @sqp: the sending QP
- *
- * This is called from qib_do_send() to
- * forward a WQE addressed to the same HCA.
- * Note that although we are single threaded due to the tasklet, we still
- * have to protect against post_send(). We don't have to worry about
- * receive interrupts since this is a connected protocol and all packets
- * will pass through here.
- */
-static void qib_ruc_loopback(struct rvt_qp *sqp)
-{
- struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
- struct qib_pportdata *ppd = ppd_from_ibp(ibp);
- struct qib_devdata *dd = ppd->dd;
- struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
- struct rvt_qp *qp;
- struct rvt_swqe *wqe;
- struct rvt_sge *sge;
- unsigned long flags;
- struct ib_wc wc;
- u64 sdata;
- atomic64_t *maddr;
- enum ib_wc_status send_status;
- int release;
- int ret;
-
- rcu_read_lock();
- /*
- * Note that we check the responder QP state after
- * checking the requester's state.
- */
- qp = rvt_lookup_qpn(rdi, &ibp->rvp, sqp->remote_qpn);
- if (!qp)
- goto done;
-
- spin_lock_irqsave(&sqp->s_lock, flags);
-
- /* Return if we are already busy processing a work request. */
- if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
- !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
- goto unlock;
-
- sqp->s_flags |= RVT_S_BUSY;
-
-again:
- if (sqp->s_last == READ_ONCE(sqp->s_head))
- goto clr_busy;
- wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
-
- /* Return if it is not OK to start a new work reqeust. */
- if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
- if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
- goto clr_busy;
- /* We are in the error state, flush the work request. */
- send_status = IB_WC_WR_FLUSH_ERR;
- goto flush_send;
- }
-
- /*
- * We can rely on the entry not changing without the s_lock
- * being held until we update s_last.
- * We increment s_cur to indicate s_last is in progress.
- */
- if (sqp->s_last == sqp->s_cur) {
- if (++sqp->s_cur >= sqp->s_size)
- sqp->s_cur = 0;
- }
- spin_unlock_irqrestore(&sqp->s_lock, flags);
-
- if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
- qp->ibqp.qp_type != sqp->ibqp.qp_type) {
- ibp->rvp.n_pkt_drops++;
- /*
- * For RC, the requester would timeout and retry so
- * shortcut the timeouts and just signal too many retries.
- */
- if (sqp->ibqp.qp_type == IB_QPT_RC)
- send_status = IB_WC_RETRY_EXC_ERR;
- else
- send_status = IB_WC_SUCCESS;
- goto serr;
- }
-
- memset(&wc, 0, sizeof(wc));
- send_status = IB_WC_SUCCESS;
-
- release = 1;
- sqp->s_sge.sge = wqe->sg_list[0];
- sqp->s_sge.sg_list = wqe->sg_list + 1;
- sqp->s_sge.num_sge = wqe->wr.num_sge;
- sqp->s_len = wqe->length;
- switch (wqe->wr.opcode) {
- case IB_WR_SEND_WITH_IMM:
- wc.wc_flags = IB_WC_WITH_IMM;
- wc.ex.imm_data = wqe->wr.ex.imm_data;
- /* FALLTHROUGH */
- case IB_WR_SEND:
- ret = rvt_get_rwqe(qp, false);
- if (ret < 0)
- goto op_err;
- if (!ret)
- goto rnr_nak;
- break;
-
- case IB_WR_RDMA_WRITE_WITH_IMM:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
- goto inv_err;
- wc.wc_flags = IB_WC_WITH_IMM;
- wc.ex.imm_data = wqe->wr.ex.imm_data;
- ret = rvt_get_rwqe(qp, true);
- if (ret < 0)
- goto op_err;
- if (!ret)
- goto rnr_nak;
- /* FALLTHROUGH */
- case IB_WR_RDMA_WRITE:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
- goto inv_err;
- if (wqe->length == 0)
- break;
- if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
- wqe->rdma_wr.remote_addr,
- wqe->rdma_wr.rkey,
- IB_ACCESS_REMOTE_WRITE)))
- goto acc_err;
- qp->r_sge.sg_list = NULL;
- qp->r_sge.num_sge = 1;
- qp->r_sge.total_len = wqe->length;
- break;
-
- case IB_WR_RDMA_READ:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
- goto inv_err;
- if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
- wqe->rdma_wr.remote_addr,
- wqe->rdma_wr.rkey,
- IB_ACCESS_REMOTE_READ)))
- goto acc_err;
- release = 0;
- sqp->s_sge.sg_list = NULL;
- sqp->s_sge.num_sge = 1;
- qp->r_sge.sge = wqe->sg_list[0];
- qp->r_sge.sg_list = wqe->sg_list + 1;
- qp->r_sge.num_sge = wqe->wr.num_sge;
- qp->r_sge.total_len = wqe->length;
- break;
-
- case IB_WR_ATOMIC_CMP_AND_SWP:
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
- goto inv_err;
- if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
- wqe->atomic_wr.remote_addr,
- wqe->atomic_wr.rkey,
- IB_ACCESS_REMOTE_ATOMIC)))
- goto acc_err;
- /* Perform atomic OP and save result. */
- maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
- sdata = wqe->atomic_wr.compare_add;
- *(u64 *) sqp->s_sge.sge.vaddr =
- (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
- (u64) atomic64_add_return(sdata, maddr) - sdata :
- (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
- sdata, wqe->atomic_wr.swap);
- rvt_put_mr(qp->r_sge.sge.mr);
- qp->r_sge.num_sge = 0;
- goto send_comp;
-
- default:
- send_status = IB_WC_LOC_QP_OP_ERR;
- goto serr;
- }
-
- sge = &sqp->s_sge.sge;
- while (sqp->s_len) {
- u32 len = sqp->s_len;
-
- if (len > sge->length)
- len = sge->length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- BUG_ON(len == 0);
- qib_copy_sge(&qp->r_sge, sge->vaddr, len, release);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (!release)
- rvt_put_mr(sge->mr);
- if (--sqp->s_sge.num_sge)
- *sge = *sqp->s_sge.sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
- sqp->s_len -= len;
- }
- if (release)
- rvt_put_ss(&qp->r_sge);
-
- if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
- goto send_comp;
-
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
- wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
- else
- wc.opcode = IB_WC_RECV;
- wc.wr_id = qp->r_wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- wc.src_qp = qp->remote_qpn;
- wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
- wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
- wc.port_num = 1;
- /* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- wqe->wr.send_flags & IB_SEND_SOLICITED);
-
-send_comp:
- spin_lock_irqsave(&sqp->s_lock, flags);
- ibp->rvp.n_loop_pkts++;
-flush_send:
- sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
- qib_send_complete(sqp, wqe, send_status);
- goto again;
-
-rnr_nak:
- /* Handle RNR NAK */
- if (qp->ibqp.qp_type == IB_QPT_UC)
- goto send_comp;
- ibp->rvp.n_rnr_naks++;
- /*
- * Note: we don't need the s_lock held since the BUSY flag
- * makes this single threaded.
- */
- if (sqp->s_rnr_retry == 0) {
- send_status = IB_WC_RNR_RETRY_EXC_ERR;
- goto serr;
- }
- if (sqp->s_rnr_retry_cnt < 7)
- sqp->s_rnr_retry--;
- spin_lock_irqsave(&sqp->s_lock, flags);
- if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
- goto clr_busy;
- rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
- IB_AETH_CREDIT_SHIFT);
- goto clr_busy;
-
-op_err:
- send_status = IB_WC_REM_OP_ERR;
- wc.status = IB_WC_LOC_QP_OP_ERR;
- goto err;
-
-inv_err:
- send_status = IB_WC_REM_INV_REQ_ERR;
- wc.status = IB_WC_LOC_QP_OP_ERR;
- goto err;
-
-acc_err:
- send_status = IB_WC_REM_ACCESS_ERR;
- wc.status = IB_WC_LOC_PROT_ERR;
-err:
- /* responder goes to error state */
- rvt_rc_error(qp, wc.status);
-
-serr:
- spin_lock_irqsave(&sqp->s_lock, flags);
- qib_send_complete(sqp, wqe, send_status);
- if (sqp->ibqp.qp_type == IB_QPT_RC) {
- int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
-
- sqp->s_flags &= ~RVT_S_BUSY;
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- if (lastwqe) {
- struct ib_event ev;
-
- ev.device = sqp->ibqp.device;
- ev.element.qp = &sqp->ibqp;
- ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
- sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
- }
- goto done;
- }
-clr_busy:
- sqp->s_flags &= ~RVT_S_BUSY;
-unlock:
- spin_unlock_irqrestore(&sqp->s_lock, flags);
-done:
- rcu_read_unlock();
-}
-
-/**
* qib_make_grh - construct a GRH header
* @ibp: a pointer to the IB port
* @hdr: a pointer to the GRH header being constructed
@@ -573,7 +272,7 @@ void qib_do_send(struct rvt_qp *qp)
qp->ibqp.qp_type == IB_QPT_UC) &&
(rdma_ah_get_dlid(&qp->remote_ah_attr) &
~((1 << ppd->lmc) - 1)) == ppd->lid) {
- qib_ruc_loopback(qp);
+ rvt_ruc_loopback(qp);
return;
}
@@ -613,42 +312,3 @@ void qib_do_send(struct rvt_qp *qp)
spin_unlock_irqrestore(&qp->s_lock, flags);
}
-
-/*
- * This should be called with s_lock held.
- */
-void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
- enum ib_wc_status status)
-{
- u32 old_last, last;
-
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
- return;
-
- last = qp->s_last;
- old_last = last;
- if (++last >= qp->s_size)
- last = 0;
- qp->s_last = last;
- /* See post_send() */
- barrier();
- rvt_put_swqe(wqe);
- if (qp->ibqp.qp_type == IB_QPT_UD ||
- qp->ibqp.qp_type == IB_QPT_SMI ||
- qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
-
- rvt_qp_swqe_complete(qp,
- wqe,
- ib_qib_wc_opcode[wqe->wr.opcode],
- status);
-
- if (qp->s_acked == old_last)
- qp->s_acked = last;
- if (qp->s_cur == old_last)
- qp->s_cur = last;
- if (qp->s_tail == old_last)
- qp->s_tail = last;
- if (qp->state == IB_QPS_SQD && last == qp->s_cur)
- qp->s_draining = 0;
-}
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index d0723d4aef5c..757d4c9d713d 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -651,7 +651,7 @@ unmap:
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)
rvt_error_qp(qp, IB_WC_GENERAL_ERR);
} else if (qp->s_wqe)
- qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
+ rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
spin_unlock(&qp->s_lock);
spin_unlock(&qp->r_lock);
/* return zero to process the next send work request */
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index ca2638d8f35e..1cf4ca3f23e3 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -551,17 +551,18 @@ static struct kobj_type qib_diagc_ktype = {
* Start of per-unit (or driver, in some cases, but replicated
* per unit) functions (these get a device *)
*/
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+ char *buf)
{
struct qib_ibdev *dev =
container_of(device, struct qib_ibdev, rdi.ibdev.dev);
return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
container_of(device, struct qib_ibdev, rdi.ibdev.dev);
@@ -574,15 +575,18 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
return ret;
}
+static DEVICE_ATTR_RO(hca_type);
+static DEVICE_ATTR(board_id, 0444, hca_type_show, NULL);
-static ssize_t show_version(struct device *device,
+static ssize_t version_show(struct device *device,
struct device_attribute *attr, char *buf)
{
/* The string printed here is already newline-terminated. */
return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version);
}
+static DEVICE_ATTR_RO(version);
-static ssize_t show_boardversion(struct device *device,
+static ssize_t boardversion_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
@@ -592,9 +596,9 @@ static ssize_t show_boardversion(struct device *device,
/* The string printed here is already newline-terminated. */
return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
}
+static DEVICE_ATTR_RO(boardversion);
-
-static ssize_t show_localbus_info(struct device *device,
+static ssize_t localbus_info_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
@@ -604,9 +608,9 @@ static ssize_t show_localbus_info(struct device *device,
/* The string printed here is already newline-terminated. */
return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info);
}
+static DEVICE_ATTR_RO(localbus_info);
-
-static ssize_t show_nctxts(struct device *device,
+static ssize_t nctxts_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
@@ -620,9 +624,10 @@ static ssize_t show_nctxts(struct device *device,
(dd->first_user_ctxt > dd->cfgctxts) ? 0 :
(dd->cfgctxts - dd->first_user_ctxt));
}
+static DEVICE_ATTR_RO(nctxts);
-static ssize_t show_nfreectxts(struct device *device,
- struct device_attribute *attr, char *buf)
+static ssize_t nfreectxts_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
container_of(device, struct qib_ibdev, rdi.ibdev.dev);
@@ -631,8 +636,9 @@ static ssize_t show_nfreectxts(struct device *device,
/* Return the number of free user ports (contexts) available. */
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
}
+static DEVICE_ATTR_RO(nfreectxts);
-static ssize_t show_serial(struct device *device,
+static ssize_t serial_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
@@ -644,8 +650,9 @@ static ssize_t show_serial(struct device *device,
strcat(buf, "\n");
return strlen(buf);
}
+static DEVICE_ATTR_RO(serial);
-static ssize_t store_chip_reset(struct device *device,
+static ssize_t chip_reset_store(struct device *device,
struct device_attribute *attr, const char *buf,
size_t count)
{
@@ -663,11 +670,12 @@ static ssize_t store_chip_reset(struct device *device,
bail:
return ret < 0 ? ret : count;
}
+static DEVICE_ATTR_WO(chip_reset);
/*
* Dump tempsense regs. in decimal, to ease shell-scripts.
*/
-static ssize_t show_tempsense(struct device *device,
+static ssize_t tempsense_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
@@ -695,6 +703,7 @@ static ssize_t show_tempsense(struct device *device,
*(signed char *)(regvals + 7));
return ret;
}
+static DEVICE_ATTR_RO(tempsense);
/*
* end of per-unit (or driver, in some cases, but replicated
@@ -702,30 +711,23 @@ static ssize_t show_tempsense(struct device *device,
*/
/* start of per-unit file structures and support code */
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(version, S_IRUGO, show_version, NULL);
-static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL);
-static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL);
-static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
-static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
-static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
-static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL);
-static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
-
-static struct device_attribute *qib_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id,
- &dev_attr_version,
- &dev_attr_nctxts,
- &dev_attr_nfreectxts,
- &dev_attr_serial,
- &dev_attr_boardversion,
- &dev_attr_tempsense,
- &dev_attr_localbus_info,
- &dev_attr_chip_reset,
+static struct attribute *qib_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ &dev_attr_version.attr,
+ &dev_attr_nctxts.attr,
+ &dev_attr_nfreectxts.attr,
+ &dev_attr_serial.attr,
+ &dev_attr_boardversion.attr,
+ &dev_attr_tempsense.attr,
+ &dev_attr_localbus_info.attr,
+ &dev_attr_chip_reset.attr,
+ NULL,
+};
+
+const struct attribute_group qib_attr_group = {
+ .attrs = qib_attributes,
};
int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
@@ -827,27 +829,6 @@ bail:
}
/*
- * Register and create our files in /sys/class/infiniband.
- */
-int qib_verbs_register_sysfs(struct qib_devdata *dd)
-{
- struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
- int i, ret;
-
- for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) {
- ret = device_create_file(&dev->dev, qib_attributes[i]);
- if (ret)
- goto bail;
- }
-
- return 0;
-bail:
- for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i)
- device_remove_file(&dev->dev, qib_attributes[i]);
- return ret;
-}
-
-/*
* Unregister and remove our files in /sys/class/infiniband.
*/
void qib_verbs_unregister_sysfs(struct qib_devdata *dd)
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 3e54bc11e0ae..30c70ad0f4bf 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -68,7 +68,7 @@ int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags)
goto bail;
}
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
}
@@ -359,7 +359,7 @@ send_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto rewind;
- qib_copy_sge(&qp->r_sge, data, pmtu, 0);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
break;
case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -385,7 +385,7 @@ send_last:
if (unlikely(wc.byte_len > qp->r_len))
goto rewind;
wc.opcode = IB_WC_RECV;
- qib_copy_sge(&qp->r_sge, data, tlen, 0);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
rvt_put_ss(&qp->s_rdma_read_sge);
last_imm:
wc.wr_id = qp->r_wr_id;
@@ -449,7 +449,7 @@ rdma_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto drop;
- qib_copy_sge(&qp->r_sge, data, pmtu, 1);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -479,7 +479,7 @@ rdma_last_imm:
}
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
- qib_copy_sge(&qp->r_sge, data, tlen, 1);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
goto last_imm;
@@ -495,7 +495,7 @@ rdma_last:
tlen -= (hdrsize + pad + 4);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
- qib_copy_sge(&qp->r_sge, data, tlen, 1);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
break;
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index f8d029a2390f..4d4c31ea4e2d 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -162,8 +162,8 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
const struct ib_global_route *grd = rdma_ah_read_grh(ah_attr);
qib_make_grh(ibp, &grh, grd, 0, 0);
- qib_copy_sge(&qp->r_sge, &grh,
- sizeof(grh), 1);
+ rvt_copy_sge(qp, &qp->r_sge, &grh,
+ sizeof(grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else
rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
@@ -179,7 +179,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (len > sge->sge_length)
len = sge->sge_length;
BUG_ON(len == 0);
- qib_copy_sge(&qp->r_sge, sge->vaddr, len, 1);
+ rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
@@ -260,7 +260,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
goto bail;
}
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
}
@@ -304,7 +304,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
qib_ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, tflags);
*flags = tflags;
- qib_send_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done;
}
}
@@ -551,12 +551,13 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
goto drop;
}
if (has_grh) {
- qib_copy_sge(&qp->r_sge, &hdr->u.l.grh,
- sizeof(struct ib_grh), 1);
+ rvt_copy_sge(qp, &qp->r_sge, &hdr->u.l.grh,
+ sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else
rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
- qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
+ rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
+ true, false);
rvt_put_ss(&qp->r_sge);
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
return;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 41babbc0db58..4b0f5761a646 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -131,27 +131,6 @@ const enum ib_wc_opcode ib_qib_wc_opcode[] = {
*/
__be64 ib_qib_sys_image_guid;
-/**
- * qib_copy_sge - copy data to SGE memory
- * @ss: the SGE state
- * @data: the data to copy
- * @length: the length of the data
- */
-void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release)
-{
- struct rvt_sge *sge = &ss->sge;
-
- while (length) {
- u32 len = rvt_get_sge_length(sge, length);
-
- WARN_ON_ONCE(len == 0);
- memcpy(sge->vaddr, data, len);
- rvt_update_sge(ss, len, release);
- data += len;
- length -= len;
- }
-}
-
/*
* Count the number of DMA descriptors needed to send length bytes of data.
* Don't modify the qib_sge_state to get the count.
@@ -752,7 +731,7 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
spin_lock(&qp->s_lock);
if (tx->wqe)
- qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
+ rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
else if (qp->ibqp.qp_type == IB_QPT_RC) {
struct ib_header *hdr;
@@ -1025,7 +1004,7 @@ done:
}
if (qp->s_wqe) {
spin_lock_irqsave(&qp->s_lock, flags);
- qib_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
+ rvt_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
spin_lock_irqsave(&qp->s_lock, flags);
@@ -1512,6 +1491,9 @@ static void qib_fill_device_attr(struct qib_devdata *dd)
rdi->dparms.props.max_mcast_grp;
/* post send table */
dd->verbs_dev.rdi.post_parms = qib_post_parms;
+
+ /* opcode translation table */
+ dd->verbs_dev.rdi.wc_opcode = ib_qib_wc_opcode;
}
/**
@@ -1588,7 +1570,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files;
dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
- dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe;
+ dd->verbs_dev.rdi.driver_f.setup_wqe = qib_check_send_wqe;
dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah;
dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn;
dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc;
@@ -1631,6 +1613,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id;
dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB;
dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE;
+ dd->verbs_dev.rdi.dparms.sge_copy_mode = RVT_SGE_COPY_MEMCPY;
qib_fill_device_attr(dd);
@@ -1642,19 +1625,14 @@ int qib_register_ib_device(struct qib_devdata *dd)
i,
dd->rcd[ctxt]->pkeys);
}
+ rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, &qib_attr_group);
ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB);
if (ret)
goto err_tx;
- ret = qib_verbs_register_sysfs(dd);
- if (ret)
- goto err_class;
-
return ret;
-err_class:
- rvt_unregister_device(&dd->verbs_dev.rdi);
err_tx:
while (!list_empty(&dev->txreq_free)) {
struct list_head *l = dev->txreq_free.next;
@@ -1716,14 +1694,14 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
* It is only used in post send, which doesn't hold
* the s_lock.
*/
-void _qib_schedule_send(struct rvt_qp *qp)
+bool _qib_schedule_send(struct rvt_qp *qp)
{
struct qib_ibport *ibp =
to_iport(qp->ibqp.device, qp->port_num);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
struct qib_qp_priv *priv = qp->priv;
- queue_work(ppd->qib_wq, &priv->s_work);
+ return queue_work(ppd->qib_wq, &priv->s_work);
}
/**
@@ -1733,8 +1711,9 @@ void _qib_schedule_send(struct rvt_qp *qp)
* This schedules qp progress. The s_lock
* should be held.
*/
-void qib_schedule_send(struct rvt_qp *qp)
+bool qib_schedule_send(struct rvt_qp *qp)
{
if (qib_send_ok(qp))
- _qib_schedule_send(qp);
+ return _qib_schedule_send(qp);
+ return false;
}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 666613eef88f..a4426c24b0d1 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012 - 2018 Intel Corporation. All rights reserved.
* Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -223,8 +223,8 @@ static inline int qib_send_ok(struct rvt_qp *qp)
!(qp->s_flags & RVT_S_ANY_WAIT_SEND));
}
-void _qib_schedule_send(struct rvt_qp *qp);
-void qib_schedule_send(struct rvt_qp *qp);
+bool _qib_schedule_send(struct rvt_qp *qp);
+bool qib_schedule_send(struct rvt_qp *qp);
static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
{
@@ -292,9 +292,6 @@ void qib_put_txreq(struct qib_verbs_txreq *tx);
int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
u32 hdrwords, struct rvt_sge_state *ss, u32 len);
-void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
- int release);
-
void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
@@ -303,7 +300,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
-int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ bool *call_send);
struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
@@ -333,9 +331,6 @@ void _qib_do_send(struct work_struct *work);
void qib_do_send(struct rvt_qp *qp);
-void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
- enum ib_wc_status status);
-
void qib_send_rc_ack(struct rvt_qp *qp);
int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags);
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c
index 92dc66cc2d50..a3115709fb03 100644
--- a/drivers/infiniband/hw/usnic/usnic_debugfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c
@@ -165,6 +165,5 @@ void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow)
{
- if (!IS_ERR_OR_NULL(qp_flow->dbgfs_dentry))
- debugfs_remove(qp_flow->dbgfs_dentry);
+ debugfs_remove(qp_flow->dbgfs_dentry);
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index f0538a460328..73bd00f8d2c8 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -76,7 +76,7 @@ static LIST_HEAD(usnic_ib_ibdev_list);
static int usnic_ib_dump_vf_hdr(void *obj, char *buf, int buf_sz)
{
struct usnic_ib_vf *vf = obj;
- return scnprintf(buf, buf_sz, "PF: %s ", vf->pf->ib_dev.name);
+ return scnprintf(buf, buf_sz, "PF: %s ", dev_name(&vf->pf->ib_dev.dev));
}
/* End callback dump funcs */
@@ -138,7 +138,7 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
netdev = us_ibdev->netdev;
switch (event) {
case NETDEV_REBOOT:
- usnic_info("PF Reset on %s\n", us_ibdev->ib_dev.name);
+ usnic_info("PF Reset on %s\n", dev_name(&us_ibdev->ib_dev.dev));
usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
ib_event.event = IB_EVENT_PORT_ERR;
ib_event.device = &us_ibdev->ib_dev;
@@ -151,7 +151,8 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
if (!us_ibdev->ufdev->link_up &&
netif_carrier_ok(netdev)) {
usnic_fwd_carrier_up(us_ibdev->ufdev);
- usnic_info("Link UP on %s\n", us_ibdev->ib_dev.name);
+ usnic_info("Link UP on %s\n",
+ dev_name(&us_ibdev->ib_dev.dev));
ib_event.event = IB_EVENT_PORT_ACTIVE;
ib_event.device = &us_ibdev->ib_dev;
ib_event.element.port_num = 1;
@@ -159,7 +160,8 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
} else if (us_ibdev->ufdev->link_up &&
!netif_carrier_ok(netdev)) {
usnic_fwd_carrier_down(us_ibdev->ufdev);
- usnic_info("Link DOWN on %s\n", us_ibdev->ib_dev.name);
+ usnic_info("Link DOWN on %s\n",
+ dev_name(&us_ibdev->ib_dev.dev));
usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
ib_event.event = IB_EVENT_PORT_ERR;
ib_event.device = &us_ibdev->ib_dev;
@@ -168,17 +170,17 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
} else {
usnic_dbg("Ignoring %s on %s\n",
netdev_cmd_to_name(event),
- us_ibdev->ib_dev.name);
+ dev_name(&us_ibdev->ib_dev.dev));
}
break;
case NETDEV_CHANGEADDR:
if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr,
sizeof(us_ibdev->ufdev->mac))) {
usnic_dbg("Ignoring addr change on %s\n",
- us_ibdev->ib_dev.name);
+ dev_name(&us_ibdev->ib_dev.dev));
} else {
usnic_info(" %s old mac: %pM new mac: %pM\n",
- us_ibdev->ib_dev.name,
+ dev_name(&us_ibdev->ib_dev.dev),
us_ibdev->ufdev->mac,
netdev->dev_addr);
usnic_fwd_set_mac(us_ibdev->ufdev, netdev->dev_addr);
@@ -193,19 +195,19 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
case NETDEV_CHANGEMTU:
if (us_ibdev->ufdev->mtu != netdev->mtu) {
usnic_info("MTU Change on %s old: %u new: %u\n",
- us_ibdev->ib_dev.name,
+ dev_name(&us_ibdev->ib_dev.dev),
us_ibdev->ufdev->mtu, netdev->mtu);
usnic_fwd_set_mtu(us_ibdev->ufdev, netdev->mtu);
usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
} else {
usnic_dbg("Ignoring MTU change on %s\n",
- us_ibdev->ib_dev.name);
+ dev_name(&us_ibdev->ib_dev.dev));
}
break;
default:
usnic_dbg("Ignoring event %s on %s",
netdev_cmd_to_name(event),
- us_ibdev->ib_dev.name);
+ dev_name(&us_ibdev->ib_dev.dev));
}
mutex_unlock(&us_ibdev->usdev_lock);
}
@@ -267,7 +269,7 @@ static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev,
default:
usnic_info("Ignoring event %s on %s",
netdev_cmd_to_name(event),
- us_ibdev->ib_dev.name);
+ dev_name(&us_ibdev->ib_dev.dev));
}
mutex_unlock(&us_ibdev->usdev_lock);
@@ -364,7 +366,6 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
us_ibdev->ib_dev.dev.parent = &dev->dev;
us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION;
- strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX);
us_ibdev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -416,7 +417,9 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC;
- if (ib_register_device(&us_ibdev->ib_dev, NULL))
+ rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group);
+
+ if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d", NULL))
goto err_fwd_dealloc;
usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu);
@@ -437,9 +440,9 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
kref_init(&us_ibdev->vf_cnt);
usnic_info("Added ibdev: %s netdev: %s with mac %pM Link: %u MTU: %u\n",
- us_ibdev->ib_dev.name, netdev_name(us_ibdev->netdev),
- us_ibdev->ufdev->mac, us_ibdev->ufdev->link_up,
- us_ibdev->ufdev->mtu);
+ dev_name(&us_ibdev->ib_dev.dev),
+ netdev_name(us_ibdev->netdev), us_ibdev->ufdev->mac,
+ us_ibdev->ufdev->link_up, us_ibdev->ufdev->mtu);
return us_ibdev;
err_fwd_dealloc:
@@ -452,7 +455,7 @@ err_dealloc:
static void usnic_ib_device_remove(struct usnic_ib_dev *us_ibdev)
{
- usnic_info("Unregistering %s\n", us_ibdev->ib_dev.name);
+ usnic_info("Unregistering %s\n", dev_name(&us_ibdev->ib_dev.dev));
usnic_ib_sysfs_unregister_usdev(us_ibdev);
usnic_fwd_dev_free(us_ibdev->ufdev);
ib_unregister_device(&us_ibdev->ib_dev);
@@ -591,7 +594,7 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev,
mutex_unlock(&pf->usdev_lock);
usnic_info("Registering usnic VF %s into PF %s\n", pci_name(pdev),
- pf->ib_dev.name);
+ dev_name(&pf->ib_dev.dev));
usnic_ib_log_vf(vf);
return 0;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
index 4210ca14014d..a7e4b2ccfaf8 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -46,9 +46,8 @@
#include "usnic_ib_sysfs.h"
#include "usnic_log.h"
-static ssize_t usnic_ib_show_board(struct device *device,
- struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct usnic_ib_dev *us_ibdev =
container_of(device, struct usnic_ib_dev, ib_dev.dev);
@@ -60,13 +59,13 @@ static ssize_t usnic_ib_show_board(struct device *device,
return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id);
}
+static DEVICE_ATTR_RO(board_id);
/*
* Report the configuration for this PF
*/
static ssize_t
-usnic_ib_show_config(struct device *device, struct device_attribute *attr,
- char *buf)
+config_show(struct device *device, struct device_attribute *attr, char *buf)
{
struct usnic_ib_dev *us_ibdev;
char *ptr;
@@ -94,7 +93,7 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr,
n = scnprintf(ptr, left,
"%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:",
- us_ibdev->ib_dev.name,
+ dev_name(&us_ibdev->ib_dev.dev),
busname,
PCI_SLOT(us_ibdev->pdev->devfn),
PCI_FUNC(us_ibdev->pdev->devfn),
@@ -119,17 +118,17 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr,
UPDATE_PTR_LEFT(n, ptr, left);
} else {
n = scnprintf(ptr, left, "%s: no VFs\n",
- us_ibdev->ib_dev.name);
+ dev_name(&us_ibdev->ib_dev.dev));
UPDATE_PTR_LEFT(n, ptr, left);
}
mutex_unlock(&us_ibdev->usdev_lock);
return ptr - buf;
}
+static DEVICE_ATTR_RO(config);
static ssize_t
-usnic_ib_show_iface(struct device *device, struct device_attribute *attr,
- char *buf)
+iface_show(struct device *device, struct device_attribute *attr, char *buf)
{
struct usnic_ib_dev *us_ibdev;
@@ -138,10 +137,10 @@ usnic_ib_show_iface(struct device *device, struct device_attribute *attr,
return scnprintf(buf, PAGE_SIZE, "%s\n",
netdev_name(us_ibdev->netdev));
}
+static DEVICE_ATTR_RO(iface);
static ssize_t
-usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr,
- char *buf)
+max_vf_show(struct device *device, struct device_attribute *attr, char *buf)
{
struct usnic_ib_dev *us_ibdev;
@@ -150,10 +149,10 @@ usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr,
return scnprintf(buf, PAGE_SIZE, "%u\n",
kref_read(&us_ibdev->vf_cnt));
}
+static DEVICE_ATTR_RO(max_vf);
static ssize_t
-usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr,
- char *buf)
+qp_per_vf_show(struct device *device, struct device_attribute *attr, char *buf)
{
struct usnic_ib_dev *us_ibdev;
int qp_per_vf;
@@ -165,10 +164,10 @@ usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr,
return scnprintf(buf, PAGE_SIZE,
"%d\n", qp_per_vf);
}
+static DEVICE_ATTR_RO(qp_per_vf);
static ssize_t
-usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr,
- char *buf)
+cq_per_vf_show(struct device *device, struct device_attribute *attr, char *buf)
{
struct usnic_ib_dev *us_ibdev;
@@ -177,21 +176,20 @@ usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr,
return scnprintf(buf, PAGE_SIZE, "%d\n",
us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
}
+static DEVICE_ATTR_RO(cq_per_vf);
+
+static struct attribute *usnic_class_attributes[] = {
+ &dev_attr_board_id.attr,
+ &dev_attr_config.attr,
+ &dev_attr_iface.attr,
+ &dev_attr_max_vf.attr,
+ &dev_attr_qp_per_vf.attr,
+ &dev_attr_cq_per_vf.attr,
+ NULL
+};
-static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL);
-static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL);
-static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL);
-static DEVICE_ATTR(max_vf, S_IRUGO, usnic_ib_show_max_vf, NULL);
-static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL);
-static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL);
-
-static struct device_attribute *usnic_class_attributes[] = {
- &dev_attr_board_id,
- &dev_attr_config,
- &dev_attr_iface,
- &dev_attr_max_vf,
- &dev_attr_qp_per_vf,
- &dev_attr_cq_per_vf,
+const struct attribute_group usnic_attr_group = {
+ .attrs = usnic_class_attributes,
};
struct qpn_attribute {
@@ -278,18 +276,6 @@ static struct kobj_type usnic_ib_qpn_type = {
int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev)
{
- int i;
- int err;
- for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
- err = device_create_file(&us_ibdev->ib_dev.dev,
- usnic_class_attributes[i]);
- if (err) {
- usnic_err("Failed to create device file %d for %s eith err %d",
- i, us_ibdev->ib_dev.name, err);
- return -EINVAL;
- }
- }
-
/* create kernel object for looking at individual QPs */
kobject_get(&us_ibdev->ib_dev.dev.kobj);
us_ibdev->qpn_kobj = kobject_create_and_add("qpn",
@@ -304,12 +290,6 @@ int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev)
void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev)
{
- int i;
- for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
- device_remove_file(&us_ibdev->ib_dev.dev,
- usnic_class_attributes[i]);
- }
-
kobject_put(us_ibdev->qpn_kobj);
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
index 3d98e16cfeaf..b1f064cec850 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
@@ -41,4 +41,6 @@ void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev);
void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp);
void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp);
+extern const struct attribute_group usnic_attr_group;
+
#endif /* !USNIC_IB_SYSFS_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 9973ac893635..0b91ff36768a 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -159,7 +159,8 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
err = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (err) {
- usnic_err("Failed to copy udata for %s", us_ibdev->ib_dev.name);
+ usnic_err("Failed to copy udata for %s",
+ dev_name(&us_ibdev->ib_dev.dev));
return err;
}
@@ -197,7 +198,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
vnic = vf->vnic;
if (!usnic_vnic_check_room(vnic, res_spec)) {
usnic_dbg("Found used vnic %s from %s\n",
- us_ibdev->ib_dev.name,
+ dev_name(&us_ibdev->ib_dev.dev),
pci_name(usnic_vnic_get_pdev(
vnic)));
qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev,
@@ -230,7 +231,8 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
spin_unlock(&vf->lock);
}
- usnic_info("No free qp grp found on %s\n", us_ibdev->ib_dev.name);
+ usnic_info("No free qp grp found on %s\n",
+ dev_name(&us_ibdev->ib_dev.dev));
return ERR_PTR(-ENOMEM);
qp_grp_check:
@@ -471,7 +473,7 @@ struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
}
usnic_info("domain 0x%p allocated for context 0x%p and device %s\n",
- pd, context, ibdev->name);
+ pd, context, dev_name(&ibdev->dev));
return &pd->ibpd;
}
@@ -508,20 +510,20 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
if (err) {
usnic_err("%s: cannot copy udata for create_qp\n",
- us_ibdev->ib_dev.name);
+ dev_name(&us_ibdev->ib_dev.dev));
return ERR_PTR(-EINVAL);
}
err = create_qp_validate_user_data(cmd);
if (err) {
usnic_err("%s: Failed to validate user data\n",
- us_ibdev->ib_dev.name);
+ dev_name(&us_ibdev->ib_dev.dev));
return ERR_PTR(-EINVAL);
}
if (init_attr->qp_type != IB_QPT_UD) {
usnic_err("%s asked to make a non-UD QP: %d\n",
- us_ibdev->ib_dev.name, init_attr->qp_type);
+ dev_name(&us_ibdev->ib_dev.dev), init_attr->qp_type);
return ERR_PTR(-EINVAL);
}
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c
index e0a95538c364..82dd810bc000 100644
--- a/drivers/infiniband/hw/usnic/usnic_transport.c
+++ b/drivers/infiniband/hw/usnic/usnic_transport.c
@@ -121,7 +121,7 @@ void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num)
if (type == USNIC_TRANSPORT_ROCE_CUSTOM) {
spin_lock(&roce_bitmap_lock);
if (!port_num) {
- usnic_err("Unreserved unvalid port num 0 for %s\n",
+ usnic_err("Unreserved invalid port num 0 for %s\n",
usnic_transport_to_str(type));
goto out_roce_custom;
}
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 9dd39daa602b..49275a548751 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -54,18 +54,6 @@ static struct workqueue_struct *usnic_uiom_wq;
((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] - \
(void *) &((struct usnic_uiom_chunk *) 0)->page_list[0]))
-static void usnic_uiom_reg_account(struct work_struct *work)
-{
- struct usnic_uiom_reg *umem = container_of(work,
- struct usnic_uiom_reg, work);
-
- down_write(&umem->mm->mmap_sem);
- umem->mm->locked_vm -= umem->diff;
- up_write(&umem->mm->mmap_sem);
- mmput(umem->mm);
- kfree(umem);
-}
-
static int usnic_uiom_dma_fault(struct iommu_domain *domain,
struct device *dev,
unsigned long iova, int flags,
@@ -99,8 +87,9 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
}
static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
- int dmasync, struct list_head *chunk_list)
+ int dmasync, struct usnic_uiom_reg *uiomr)
{
+ struct list_head *chunk_list = &uiomr->chunk_list;
struct page **page_list;
struct scatterlist *sg;
struct usnic_uiom_chunk *chunk;
@@ -114,6 +103,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
int flags;
dma_addr_t pa;
unsigned int gup_flags;
+ struct mm_struct *mm;
/*
* If the combination of the addr and size requested for this memory
@@ -136,7 +126,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT;
- down_write(&current->mm->mmap_sem);
+ uiomr->owning_mm = mm = current->mm;
+ down_write(&mm->mmap_sem);
locked = npages + current->mm->pinned_vm;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@@ -196,10 +187,12 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
out:
if (ret < 0)
usnic_uiom_put_pages(chunk_list, 0);
- else
- current->mm->pinned_vm = locked;
+ else {
+ mm->pinned_vm = locked;
+ mmgrab(uiomr->owning_mm);
+ }
- up_write(&current->mm->mmap_sem);
+ up_write(&mm->mmap_sem);
free_page((unsigned long) page_list);
return ret;
}
@@ -379,7 +372,7 @@ struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
uiomr->pd = pd;
err = usnic_uiom_get_pages(addr, size, writable, dmasync,
- &uiomr->chunk_list);
+ uiomr);
if (err) {
usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n",
vpn_start, vpn_last, err);
@@ -426,29 +419,39 @@ out_put_intervals:
out_put_pages:
usnic_uiom_put_pages(&uiomr->chunk_list, 0);
spin_unlock(&pd->lock);
+ mmdrop(uiomr->owning_mm);
out_free_uiomr:
kfree(uiomr);
return ERR_PTR(err);
}
-void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
- struct ib_ucontext *ucontext)
+static void __usnic_uiom_release_tail(struct usnic_uiom_reg *uiomr)
{
- struct task_struct *task;
- struct mm_struct *mm;
- unsigned long diff;
+ mmdrop(uiomr->owning_mm);
+ kfree(uiomr);
+}
- __usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
+static inline size_t usnic_uiom_num_pages(struct usnic_uiom_reg *uiomr)
+{
+ return PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
+}
- task = get_pid_task(ucontext->tgid, PIDTYPE_PID);
- if (!task)
- goto out;
- mm = get_task_mm(task);
- put_task_struct(task);
- if (!mm)
- goto out;
+static void usnic_uiom_release_defer(struct work_struct *work)
+{
+ struct usnic_uiom_reg *uiomr =
+ container_of(work, struct usnic_uiom_reg, work);
- diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
+ down_write(&uiomr->owning_mm->mmap_sem);
+ uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr);
+ up_write(&uiomr->owning_mm->mmap_sem);
+
+ __usnic_uiom_release_tail(uiomr);
+}
+
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
+ struct ib_ucontext *context)
+{
+ __usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
/*
* We may be called with the mm's mmap_sem already held. This
@@ -456,25 +459,21 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
* the last reference to our file and calls our release
* method. If there are memory regions to destroy, we'll end
* up here and not be able to take the mmap_sem. In that case
- * we defer the vm_locked accounting to the system workqueue.
+ * we defer the vm_locked accounting to a workqueue.
*/
- if (ucontext->closing) {
- if (!down_write_trylock(&mm->mmap_sem)) {
- INIT_WORK(&uiomr->work, usnic_uiom_reg_account);
- uiomr->mm = mm;
- uiomr->diff = diff;
-
+ if (context->closing) {
+ if (!down_write_trylock(&uiomr->owning_mm->mmap_sem)) {
+ INIT_WORK(&uiomr->work, usnic_uiom_release_defer);
queue_work(usnic_uiom_wq, &uiomr->work);
return;
}
- } else
- down_write(&mm->mmap_sem);
+ } else {
+ down_write(&uiomr->owning_mm->mmap_sem);
+ }
+ uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr);
+ up_write(&uiomr->owning_mm->mmap_sem);
- mm->pinned_vm -= diff;
- up_write(&mm->mmap_sem);
- mmput(mm);
-out:
- kfree(uiomr);
+ __usnic_uiom_release_tail(uiomr);
}
struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h
index 8c096acff123..b86a9731071b 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.h
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.h
@@ -71,8 +71,7 @@ struct usnic_uiom_reg {
int writable;
struct list_head chunk_list;
struct work_struct work;
- struct mm_struct *mm;
- unsigned long diff;
+ struct mm_struct *owning_mm;
};
struct usnic_uiom_chunk {
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index a5719899f49a..398443f43dc3 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -65,32 +65,36 @@ static struct workqueue_struct *event_wq;
static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context);
static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context);
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION);
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%d\n", PVRDMA_REV_ID);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%d\n", PVRDMA_BOARD_ID);
}
+static DEVICE_ATTR_RO(board_id);
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static struct attribute *pvrdma_class_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL,
+};
-static struct device_attribute *pvrdma_class_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id
+static const struct attribute_group pvrdma_attr_group = {
+ .attrs = pvrdma_class_attributes,
};
static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str)
@@ -160,9 +164,7 @@ static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev,
static int pvrdma_register_device(struct pvrdma_dev *dev)
{
int ret = -1;
- int i = 0;
- strlcpy(dev->ib_dev.name, "vmw_pvrdma%d", IB_DEVICE_NAME_MAX);
dev->ib_dev.node_guid = dev->dsr->caps.node_guid;
dev->sys_image_guid = dev->dsr->caps.sys_image_guid;
dev->flags = 0;
@@ -266,24 +268,16 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
}
dev->ib_dev.driver_id = RDMA_DRIVER_VMW_PVRDMA;
spin_lock_init(&dev->srq_tbl_lock);
+ rdma_set_device_sysfs_group(&dev->ib_dev, &pvrdma_attr_group);
- ret = ib_register_device(&dev->ib_dev, NULL);
+ ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", NULL);
if (ret)
goto err_srq_free;
- for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) {
- ret = device_create_file(&dev->ib_dev.dev,
- pvrdma_class_attributes[i]);
- if (ret)
- goto err_class;
- }
-
dev->ib_active = true;
return 0;
-err_class:
- ib_unregister_device(&dev->ib_dev);
err_srq_free:
kfree(dev->srq_tbl);
err_qp_free:
@@ -735,7 +729,7 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
default:
dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",
- event, dev->ib_dev.name);
+ event, dev_name(&dev->ib_dev.dev));
break;
}
}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 60083c0363a5..cf22f57a9f0d 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -499,7 +499,7 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type,
- attr_mask, IB_LINK_LAYER_ETHERNET)) {
+ attr_mask)) {
ret = -EINVAL;
goto out;
}
diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig
index 98e798007f75..7df896a18d38 100644
--- a/drivers/infiniband/sw/rdmavt/Kconfig
+++ b/drivers/infiniband/sw/rdmavt/Kconfig
@@ -1,6 +1,6 @@
config INFINIBAND_RDMAVT
tristate "RDMA verbs transport library"
- depends on 64BIT && ARCH_DMA_ADDR_T_64BIT
+ depends on X86_64 && ARCH_DMA_ADDR_T_64BIT
depends on PCI
select DMA_VIRT_OPS
---help---
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 5ce403c6cddb..1735deb1a9d4 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -118,6 +118,187 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
};
EXPORT_SYMBOL(ib_rvt_state_ops);
+/* platform specific: return the last level cache (llc) size, in KiB */
+static int rvt_wss_llc_size(void)
+{
+ /* assume that the boot CPU value is universal for all CPUs */
+ return boot_cpu_data.x86_cache_size;
+}
+
+/* platform specific: cacheless copy */
+static void cacheless_memcpy(void *dst, void *src, size_t n)
+{
+ /*
+ * Use the only available X64 cacheless copy. Add a __user cast
+ * to quiet sparse. The src agument is already in the kernel so
+ * there are no security issues. The extra fault recovery machinery
+ * is not invoked.
+ */
+ __copy_user_nocache(dst, (void __user *)src, n, 0);
+}
+
+void rvt_wss_exit(struct rvt_dev_info *rdi)
+{
+ struct rvt_wss *wss = rdi->wss;
+
+ if (!wss)
+ return;
+
+ /* coded to handle partially initialized and repeat callers */
+ kfree(wss->entries);
+ wss->entries = NULL;
+ kfree(rdi->wss);
+ rdi->wss = NULL;
+}
+
+/**
+ * rvt_wss_init - Init wss data structures
+ *
+ * Return: 0 on success
+ */
+int rvt_wss_init(struct rvt_dev_info *rdi)
+{
+ unsigned int sge_copy_mode = rdi->dparms.sge_copy_mode;
+ unsigned int wss_threshold = rdi->dparms.wss_threshold;
+ unsigned int wss_clean_period = rdi->dparms.wss_clean_period;
+ long llc_size;
+ long llc_bits;
+ long table_size;
+ long table_bits;
+ struct rvt_wss *wss;
+ int node = rdi->dparms.node;
+
+ if (sge_copy_mode != RVT_SGE_COPY_ADAPTIVE) {
+ rdi->wss = NULL;
+ return 0;
+ }
+
+ rdi->wss = kzalloc_node(sizeof(*rdi->wss), GFP_KERNEL, node);
+ if (!rdi->wss)
+ return -ENOMEM;
+ wss = rdi->wss;
+
+ /* check for a valid percent range - default to 80 if none or invalid */
+ if (wss_threshold < 1 || wss_threshold > 100)
+ wss_threshold = 80;
+
+ /* reject a wildly large period */
+ if (wss_clean_period > 1000000)
+ wss_clean_period = 256;
+
+ /* reject a zero period */
+ if (wss_clean_period == 0)
+ wss_clean_period = 1;
+
+ /*
+ * Calculate the table size - the next power of 2 larger than the
+ * LLC size. LLC size is in KiB.
+ */
+ llc_size = rvt_wss_llc_size() * 1024;
+ table_size = roundup_pow_of_two(llc_size);
+
+ /* one bit per page in rounded up table */
+ llc_bits = llc_size / PAGE_SIZE;
+ table_bits = table_size / PAGE_SIZE;
+ wss->pages_mask = table_bits - 1;
+ wss->num_entries = table_bits / BITS_PER_LONG;
+
+ wss->threshold = (llc_bits * wss_threshold) / 100;
+ if (wss->threshold == 0)
+ wss->threshold = 1;
+
+ wss->clean_period = wss_clean_period;
+ atomic_set(&wss->clean_counter, wss_clean_period);
+
+ wss->entries = kcalloc_node(wss->num_entries, sizeof(*wss->entries),
+ GFP_KERNEL, node);
+ if (!wss->entries) {
+ rvt_wss_exit(rdi);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/*
+ * Advance the clean counter. When the clean period has expired,
+ * clean an entry.
+ *
+ * This is implemented in atomics to avoid locking. Because multiple
+ * variables are involved, it can be racy which can lead to slightly
+ * inaccurate information. Since this is only a heuristic, this is
+ * OK. Any innaccuracies will clean themselves out as the counter
+ * advances. That said, it is unlikely the entry clean operation will
+ * race - the next possible racer will not start until the next clean
+ * period.
+ *
+ * The clean counter is implemented as a decrement to zero. When zero
+ * is reached an entry is cleaned.
+ */
+static void wss_advance_clean_counter(struct rvt_wss *wss)
+{
+ int entry;
+ int weight;
+ unsigned long bits;
+
+ /* become the cleaner if we decrement the counter to zero */
+ if (atomic_dec_and_test(&wss->clean_counter)) {
+ /*
+ * Set, not add, the clean period. This avoids an issue
+ * where the counter could decrement below the clean period.
+ * Doing a set can result in lost decrements, slowing the
+ * clean advance. Since this a heuristic, this possible
+ * slowdown is OK.
+ *
+ * An alternative is to loop, advancing the counter by a
+ * clean period until the result is > 0. However, this could
+ * lead to several threads keeping another in the clean loop.
+ * This could be mitigated by limiting the number of times
+ * we stay in the loop.
+ */
+ atomic_set(&wss->clean_counter, wss->clean_period);
+
+ /*
+ * Uniquely grab the entry to clean and move to next.
+ * The current entry is always the lower bits of
+ * wss.clean_entry. The table size, wss.num_entries,
+ * is always a power-of-2.
+ */
+ entry = (atomic_inc_return(&wss->clean_entry) - 1)
+ & (wss->num_entries - 1);
+
+ /* clear the entry and count the bits */
+ bits = xchg(&wss->entries[entry], 0);
+ weight = hweight64((u64)bits);
+ /* only adjust the contended total count if needed */
+ if (weight)
+ atomic_sub(weight, &wss->total_count);
+ }
+}
+
+/*
+ * Insert the given address into the working set array.
+ */
+static void wss_insert(struct rvt_wss *wss, void *address)
+{
+ u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss->pages_mask;
+ u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
+ u32 nr = page & (BITS_PER_LONG - 1);
+
+ if (!test_and_set_bit(nr, &wss->entries[entry]))
+ atomic_inc(&wss->total_count);
+
+ wss_advance_clean_counter(wss);
+}
+
+/*
+ * Is the working set larger than the threshold?
+ */
+static inline bool wss_exceeds_threshold(struct rvt_wss *wss)
+{
+ return atomic_read(&wss->total_count) >= wss->threshold;
+}
+
static void get_map_page(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map)
{
@@ -1164,11 +1345,8 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int lastwqe = 0;
int mig = 0;
int pmtu = 0; /* for gcc warning only */
- enum rdma_link_layer link;
int opa_ah;
- link = rdma_port_get_link_layer(ibqp->device, qp->port_num);
-
spin_lock_irq(&qp->r_lock);
spin_lock(&qp->s_hlock);
spin_lock(&qp->s_lock);
@@ -1179,7 +1357,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
opa_ah = rdma_cap_opa_ah(ibqp->device, qp->port_num);
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
- attr_mask, link))
+ attr_mask))
goto inval;
if (rdi->driver_f.check_modify_qp &&
@@ -1718,7 +1896,7 @@ static inline int rvt_qp_is_avail(
*/
static int rvt_post_one_wr(struct rvt_qp *qp,
const struct ib_send_wr *wr,
- int *call_send)
+ bool *call_send)
{
struct rvt_swqe *wqe;
u32 next;
@@ -1823,15 +2001,11 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
wqe->wr.num_sge = j;
}
- /* general part of wqe valid - allow for driver checks */
- if (rdi->driver_f.check_send_wqe) {
- ret = rdi->driver_f.check_send_wqe(qp, wqe);
- if (ret < 0)
- goto bail_inval_free;
- if (ret)
- *call_send = ret;
- }
-
+ /*
+ * Calculate and set SWQE PSN values prior to handing it off
+ * to the driver's check routine. This give the driver the
+ * opportunity to adjust PSN values based on internal checks.
+ */
log_pmtu = qp->log_pmtu;
if (qp->ibqp.qp_type != IB_QPT_UC &&
qp->ibqp.qp_type != IB_QPT_RC) {
@@ -1856,8 +2030,18 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
(wqe->length ?
((wqe->length - 1) >> log_pmtu) :
0);
- qp->s_next_psn = wqe->lpsn + 1;
}
+
+ /* general part of wqe valid - allow for driver checks */
+ if (rdi->driver_f.setup_wqe) {
+ ret = rdi->driver_f.setup_wqe(qp, wqe, call_send);
+ if (ret < 0)
+ goto bail_inval_free_ref;
+ }
+
+ if (!(rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL))
+ qp->s_next_psn = wqe->lpsn + 1;
+
if (unlikely(reserved_op)) {
wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
rvt_qp_wqe_reserve(qp, wqe);
@@ -1871,6 +2055,10 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
return 0;
+bail_inval_free_ref:
+ if (qp->ibqp.qp_type != IB_QPT_UC &&
+ qp->ibqp.qp_type != IB_QPT_RC)
+ atomic_dec(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
bail_inval_free:
/* release mr holds */
while (j) {
@@ -1897,7 +2085,7 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
unsigned long flags = 0;
- int call_send;
+ bool call_send;
unsigned nreq = 0;
int err = 0;
@@ -1930,7 +2118,11 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
bail:
spin_unlock_irqrestore(&qp->s_hlock, flags);
if (nreq) {
- if (call_send)
+ /*
+ * Only call do_send if there is exactly one packet, and the
+ * driver said it was ok.
+ */
+ if (nreq == 1 && call_send)
rdi->driver_f.do_send(qp);
else
rdi->driver_f.schedule_send_no_lock(qp);
@@ -2465,3 +2657,454 @@ void rvt_qp_iter(struct rvt_dev_info *rdi,
rcu_read_unlock();
}
EXPORT_SYMBOL(rvt_qp_iter);
+
+/*
+ * This should be called with s_lock held.
+ */
+void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ enum ib_wc_status status)
+{
+ u32 old_last, last;
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
+ return;
+
+ last = qp->s_last;
+ old_last = last;
+ trace_rvt_qp_send_completion(qp, wqe, last);
+ if (++last >= qp->s_size)
+ last = 0;
+ trace_rvt_qp_send_completion(qp, wqe, last);
+ qp->s_last = last;
+ /* See post_send() */
+ barrier();
+ rvt_put_swqe(wqe);
+ if (qp->ibqp.qp_type == IB_QPT_UD ||
+ qp->ibqp.qp_type == IB_QPT_SMI ||
+ qp->ibqp.qp_type == IB_QPT_GSI)
+ atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
+
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ rdi->wc_opcode[wqe->wr.opcode],
+ status);
+
+ if (qp->s_acked == old_last)
+ qp->s_acked = last;
+ if (qp->s_cur == old_last)
+ qp->s_cur = last;
+ if (qp->s_tail == old_last)
+ qp->s_tail = last;
+ if (qp->state == IB_QPS_SQD && last == qp->s_cur)
+ qp->s_draining = 0;
+}
+EXPORT_SYMBOL(rvt_send_complete);
+
+/**
+ * rvt_copy_sge - copy data to SGE memory
+ * @qp: associated QP
+ * @ss: the SGE state
+ * @data: the data to copy
+ * @length: the length of the data
+ * @release: boolean to release MR
+ * @copy_last: do a separate copy of the last 8 bytes
+ */
+void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss,
+ void *data, u32 length,
+ bool release, bool copy_last)
+{
+ struct rvt_sge *sge = &ss->sge;
+ int i;
+ bool in_last = false;
+ bool cacheless_copy = false;
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+ struct rvt_wss *wss = rdi->wss;
+ unsigned int sge_copy_mode = rdi->dparms.sge_copy_mode;
+
+ if (sge_copy_mode == RVT_SGE_COPY_CACHELESS) {
+ cacheless_copy = length >= PAGE_SIZE;
+ } else if (sge_copy_mode == RVT_SGE_COPY_ADAPTIVE) {
+ if (length >= PAGE_SIZE) {
+ /*
+ * NOTE: this *assumes*:
+ * o The first vaddr is the dest.
+ * o If multiple pages, then vaddr is sequential.
+ */
+ wss_insert(wss, sge->vaddr);
+ if (length >= (2 * PAGE_SIZE))
+ wss_insert(wss, (sge->vaddr + PAGE_SIZE));
+
+ cacheless_copy = wss_exceeds_threshold(wss);
+ } else {
+ wss_advance_clean_counter(wss);
+ }
+ }
+
+ if (copy_last) {
+ if (length > 8) {
+ length -= 8;
+ } else {
+ copy_last = false;
+ in_last = true;
+ }
+ }
+
+again:
+ while (length) {
+ u32 len = rvt_get_sge_length(sge, length);
+
+ WARN_ON_ONCE(len == 0);
+ if (unlikely(in_last)) {
+ /* enforce byte transfer ordering */
+ for (i = 0; i < len; i++)
+ ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
+ } else if (cacheless_copy) {
+ cacheless_memcpy(sge->vaddr, data, len);
+ } else {
+ memcpy(sge->vaddr, data, len);
+ }
+ rvt_update_sge(ss, len, release);
+ data += len;
+ length -= len;
+ }
+
+ if (copy_last) {
+ copy_last = false;
+ in_last = true;
+ length = 8;
+ goto again;
+ }
+}
+EXPORT_SYMBOL(rvt_copy_sge);
+
+/**
+ * ruc_loopback - handle UC and RC loopback requests
+ * @sqp: the sending QP
+ *
+ * This is called from rvt_do_send() to forward a WQE addressed to the same HFI
+ * Note that although we are single threaded due to the send engine, we still
+ * have to protect against post_send(). We don't have to worry about
+ * receive interrupts since this is a connected protocol and all packets
+ * will pass through here.
+ */
+void rvt_ruc_loopback(struct rvt_qp *sqp)
+{
+ struct rvt_ibport *rvp = NULL;
+ struct rvt_dev_info *rdi = ib_to_rvt(sqp->ibqp.device);
+ struct rvt_qp *qp;
+ struct rvt_swqe *wqe;
+ struct rvt_sge *sge;
+ unsigned long flags;
+ struct ib_wc wc;
+ u64 sdata;
+ atomic64_t *maddr;
+ enum ib_wc_status send_status;
+ bool release;
+ int ret;
+ bool copy_last = false;
+ int local_ops = 0;
+
+ rcu_read_lock();
+ rvp = rdi->ports[sqp->port_num - 1];
+
+ /*
+ * Note that we check the responder QP state after
+ * checking the requester's state.
+ */
+
+ qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), rvp,
+ sqp->remote_qpn);
+
+ spin_lock_irqsave(&sqp->s_lock, flags);
+
+ /* Return if we are already busy processing a work request. */
+ if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
+ !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
+ goto unlock;
+
+ sqp->s_flags |= RVT_S_BUSY;
+
+again:
+ if (sqp->s_last == READ_ONCE(sqp->s_head))
+ goto clr_busy;
+ wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
+
+ /* Return if it is not OK to start a new work request. */
+ if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
+ goto clr_busy;
+ /* We are in the error state, flush the work request. */
+ send_status = IB_WC_WR_FLUSH_ERR;
+ goto flush_send;
+ }
+
+ /*
+ * We can rely on the entry not changing without the s_lock
+ * being held until we update s_last.
+ * We increment s_cur to indicate s_last is in progress.
+ */
+ if (sqp->s_last == sqp->s_cur) {
+ if (++sqp->s_cur >= sqp->s_size)
+ sqp->s_cur = 0;
+ }
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+
+ if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
+ qp->ibqp.qp_type != sqp->ibqp.qp_type) {
+ rvp->n_pkt_drops++;
+ /*
+ * For RC, the requester would timeout and retry so
+ * shortcut the timeouts and just signal too many retries.
+ */
+ if (sqp->ibqp.qp_type == IB_QPT_RC)
+ send_status = IB_WC_RETRY_EXC_ERR;
+ else
+ send_status = IB_WC_SUCCESS;
+ goto serr;
+ }
+
+ memset(&wc, 0, sizeof(wc));
+ send_status = IB_WC_SUCCESS;
+
+ release = true;
+ sqp->s_sge.sge = wqe->sg_list[0];
+ sqp->s_sge.sg_list = wqe->sg_list + 1;
+ sqp->s_sge.num_sge = wqe->wr.num_sge;
+ sqp->s_len = wqe->length;
+ switch (wqe->wr.opcode) {
+ case IB_WR_REG_MR:
+ goto send_comp;
+
+ case IB_WR_LOCAL_INV:
+ if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
+ if (rvt_invalidate_rkey(sqp,
+ wqe->wr.ex.invalidate_rkey))
+ send_status = IB_WC_LOC_PROT_ERR;
+ local_ops = 1;
+ }
+ goto send_comp;
+
+ case IB_WR_SEND_WITH_INV:
+ if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
+ wc.wc_flags = IB_WC_WITH_INVALIDATE;
+ wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
+ }
+ goto send;
+
+ case IB_WR_SEND_WITH_IMM:
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.ex.imm_data = wqe->wr.ex.imm_data;
+ /* FALLTHROUGH */
+ case IB_WR_SEND:
+send:
+ ret = rvt_get_rwqe(qp, false);
+ if (ret < 0)
+ goto op_err;
+ if (!ret)
+ goto rnr_nak;
+ break;
+
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto inv_err;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.ex.imm_data = wqe->wr.ex.imm_data;
+ ret = rvt_get_rwqe(qp, true);
+ if (ret < 0)
+ goto op_err;
+ if (!ret)
+ goto rnr_nak;
+ /* skip copy_last set and qp_access_flags recheck */
+ goto do_write;
+ case IB_WR_RDMA_WRITE:
+ copy_last = rvt_is_user_qp(qp);
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto inv_err;
+do_write:
+ if (wqe->length == 0)
+ break;
+ if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
+ IB_ACCESS_REMOTE_WRITE)))
+ goto acc_err;
+ qp->r_sge.sg_list = NULL;
+ qp->r_sge.num_sge = 1;
+ qp->r_sge.total_len = wqe->length;
+ break;
+
+ case IB_WR_RDMA_READ:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+ goto inv_err;
+ if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
+ IB_ACCESS_REMOTE_READ)))
+ goto acc_err;
+ release = false;
+ sqp->s_sge.sg_list = NULL;
+ sqp->s_sge.num_sge = 1;
+ qp->r_sge.sge = wqe->sg_list[0];
+ qp->r_sge.sg_list = wqe->sg_list + 1;
+ qp->r_sge.num_sge = wqe->wr.num_sge;
+ qp->r_sge.total_len = wqe->length;
+ break;
+
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+ goto inv_err;
+ if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+ wqe->atomic_wr.remote_addr,
+ wqe->atomic_wr.rkey,
+ IB_ACCESS_REMOTE_ATOMIC)))
+ goto acc_err;
+ /* Perform atomic OP and save result. */
+ maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
+ sdata = wqe->atomic_wr.compare_add;
+ *(u64 *)sqp->s_sge.sge.vaddr =
+ (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+ (u64)atomic64_add_return(sdata, maddr) - sdata :
+ (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
+ sdata, wqe->atomic_wr.swap);
+ rvt_put_mr(qp->r_sge.sge.mr);
+ qp->r_sge.num_sge = 0;
+ goto send_comp;
+
+ default:
+ send_status = IB_WC_LOC_QP_OP_ERR;
+ goto serr;
+ }
+
+ sge = &sqp->s_sge.sge;
+ while (sqp->s_len) {
+ u32 len = sqp->s_len;
+
+ if (len > sge->length)
+ len = sge->length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ WARN_ON_ONCE(len == 0);
+ rvt_copy_sge(qp, &qp->r_sge, sge->vaddr,
+ len, release, copy_last);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (!release)
+ rvt_put_mr(sge->mr);
+ if (--sqp->s_sge.num_sge)
+ *sge = *sqp->s_sge.sg_list++;
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= RVT_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ sqp->s_len -= len;
+ }
+ if (release)
+ rvt_put_ss(&qp->r_sge);
+
+ if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
+ goto send_comp;
+
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ else
+ wc.opcode = IB_WC_RECV;
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.byte_len = wqe->length;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = qp->remote_qpn;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
+ wc.port_num = 1;
+ /* Signal completion event if the solicited bit is set. */
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+ wqe->wr.send_flags & IB_SEND_SOLICITED);
+
+send_comp:
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ rvp->n_loop_pkts++;
+flush_send:
+ sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
+ rvt_send_complete(sqp, wqe, send_status);
+ if (local_ops) {
+ atomic_dec(&sqp->local_ops_pending);
+ local_ops = 0;
+ }
+ goto again;
+
+rnr_nak:
+ /* Handle RNR NAK */
+ if (qp->ibqp.qp_type == IB_QPT_UC)
+ goto send_comp;
+ rvp->n_rnr_naks++;
+ /*
+ * Note: we don't need the s_lock held since the BUSY flag
+ * makes this single threaded.
+ */
+ if (sqp->s_rnr_retry == 0) {
+ send_status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto serr;
+ }
+ if (sqp->s_rnr_retry_cnt < 7)
+ sqp->s_rnr_retry--;
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
+ goto clr_busy;
+ rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
+ IB_AETH_CREDIT_SHIFT);
+ goto clr_busy;
+
+op_err:
+ send_status = IB_WC_REM_OP_ERR;
+ wc.status = IB_WC_LOC_QP_OP_ERR;
+ goto err;
+
+inv_err:
+ send_status = IB_WC_REM_INV_REQ_ERR;
+ wc.status = IB_WC_LOC_QP_OP_ERR;
+ goto err;
+
+acc_err:
+ send_status = IB_WC_REM_ACCESS_ERR;
+ wc.status = IB_WC_LOC_PROT_ERR;
+err:
+ /* responder goes to error state */
+ rvt_rc_error(qp, wc.status);
+
+serr:
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ rvt_send_complete(sqp, wqe, send_status);
+ if (sqp->ibqp.qp_type == IB_QPT_RC) {
+ int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+
+ sqp->s_flags &= ~RVT_S_BUSY;
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+ if (lastwqe) {
+ struct ib_event ev;
+
+ ev.device = sqp->ibqp.device;
+ ev.element.qp = &sqp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
+ }
+ goto done;
+ }
+clr_busy:
+ sqp->s_flags &= ~RVT_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+done:
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(rvt_ruc_loopback);
diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h
index 264811fdc530..6d883972e0b8 100644
--- a/drivers/infiniband/sw/rdmavt/qp.h
+++ b/drivers/infiniband/sw/rdmavt/qp.h
@@ -66,4 +66,6 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr);
int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
+int rvt_wss_init(struct rvt_dev_info *rdi);
+void rvt_wss_exit(struct rvt_dev_info *rdi);
#endif /* DEF_RVTQP_H */
diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h
index 0ef25fc49f25..d5df352eadb1 100644
--- a/drivers/infiniband/sw/rdmavt/trace_tx.h
+++ b/drivers/infiniband/sw/rdmavt/trace_tx.h
@@ -153,6 +153,48 @@ TRACE_EVENT(
)
);
+TRACE_EVENT(
+ rvt_qp_send_completion,
+ TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 idx),
+ TP_ARGS(qp, wqe, idx),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+ __field(struct rvt_swqe *, wqe)
+ __field(u64, wr_id)
+ __field(u32, qpn)
+ __field(u32, qpt)
+ __field(u32, length)
+ __field(u32, idx)
+ __field(u32, ssn)
+ __field(enum ib_wr_opcode, opcode)
+ __field(int, send_flags)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->wqe = wqe;
+ __entry->wr_id = wqe->wr.wr_id;
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->qpt = qp->ibqp.qp_type;
+ __entry->length = wqe->length;
+ __entry->idx = idx;
+ __entry->ssn = wqe->ssn;
+ __entry->opcode = wqe->wr.opcode;
+ __entry->send_flags = wqe->wr.send_flags;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x qpt %u wqe %p idx %u wr_id %llx length %u ssn %u opcode %x send_flags %x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->qpt,
+ __entry->wqe,
+ __entry->idx,
+ __entry->wr_id,
+ __entry->length,
+ __entry->ssn,
+ __entry->opcode,
+ __entry->send_flags
+ )
+);
#endif /* __RVT_TRACE_TX_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 17e4abc067af..723d3daf2eba 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -774,6 +774,13 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
goto bail_no_mr;
}
+ /* Memory Working Set Size */
+ ret = rvt_wss_init(rdi);
+ if (ret) {
+ rvt_pr_err(rdi, "Error in WSS init.\n");
+ goto bail_mr;
+ }
+
/* Completion queues */
spin_lock_init(&rdi->n_cqs_lock);
@@ -828,10 +835,11 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
rdi->ibdev.driver_id = driver_id;
/* We are now good to announce we exist */
- ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback);
+ ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev),
+ rdi->driver_f.port_callback);
if (ret) {
rvt_pr_err(rdi, "Failed to register driver with ib core.\n");
- goto bail_mr;
+ goto bail_wss;
}
rvt_create_mad_agents(rdi);
@@ -839,6 +847,8 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
rvt_pr_info(rdi, "Registration with rdmavt done.\n");
return ret;
+bail_wss:
+ rvt_wss_exit(rdi);
bail_mr:
rvt_mr_exit(rdi);
@@ -862,6 +872,7 @@ void rvt_unregister_device(struct rvt_dev_info *rdi)
rvt_free_mad_agents(rdi);
ib_unregister_device(&rdi->ibdev);
+ rvt_wss_exit(rdi);
rvt_mr_exit(rdi);
rvt_qp_exit(rdi);
}
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 10999fa69281..383e65c7bbc0 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -103,7 +103,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM;
rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM;
rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM;
- rxe->attr.atomic_cap = RXE_ATOMIC_CAP;
+ rxe->attr.atomic_cap = IB_ATOMIC_HCA;
rxe->attr.max_ee = RXE_MAX_EE;
rxe->attr.max_rdd = RXE_MAX_RDD;
rxe->attr.max_mw = RXE_MAX_MW;
@@ -128,9 +128,9 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
/* initialize port attributes */
static int rxe_init_port_param(struct rxe_port *port)
{
- port->attr.state = RXE_PORT_STATE;
- port->attr.max_mtu = RXE_PORT_MAX_MTU;
- port->attr.active_mtu = RXE_PORT_ACTIVE_MTU;
+ port->attr.state = IB_PORT_DOWN;
+ port->attr.max_mtu = IB_MTU_4096;
+ port->attr.active_mtu = IB_MTU_256;
port->attr.gid_tbl_len = RXE_PORT_GID_TBL_LEN;
port->attr.port_cap_flags = RXE_PORT_PORT_CAP_FLAGS;
port->attr.max_msg_sz = RXE_PORT_MAX_MSG_SZ;
@@ -147,8 +147,7 @@ static int rxe_init_port_param(struct rxe_port *port)
port->attr.active_width = RXE_PORT_ACTIVE_WIDTH;
port->attr.active_speed = RXE_PORT_ACTIVE_SPEED;
port->attr.phys_state = RXE_PORT_PHYS_STATE;
- port->mtu_cap =
- ib_mtu_enum_to_int(RXE_PORT_ACTIVE_MTU);
+ port->mtu_cap = ib_mtu_enum_to_int(IB_MTU_256);
port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX);
return 0;
@@ -300,7 +299,7 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
mtu = eth_mtu_int_to_enum(ndev_mtu);
/* Make sure that new MTU in range */
- mtu = mtu ? min_t(enum ib_mtu, mtu, RXE_PORT_MAX_MTU) : IB_MTU_256;
+ mtu = mtu ? min_t(enum ib_mtu, mtu, IB_MTU_4096) : IB_MTU_256;
port->attr.active_mtu = mtu;
port->mtu_cap = ib_mtu_enum_to_int(mtu);
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 83311dd07019..ea089cb091ad 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -191,6 +191,7 @@ static inline void reset_retry_counters(struct rxe_qp *qp)
{
qp->comp.retry_cnt = qp->attr.retry_cnt;
qp->comp.rnr_retry = qp->attr.rnr_retry;
+ qp->comp.started_retry = 0;
}
static inline enum comp_state check_psn(struct rxe_qp *qp,
@@ -253,6 +254,17 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE &&
pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) {
+ /* read retries of partial data may restart from
+ * read response first or response only.
+ */
+ if ((pkt->psn == wqe->first_psn &&
+ pkt->opcode ==
+ IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) ||
+ (wqe->first_psn == wqe->last_psn &&
+ pkt->opcode ==
+ IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY))
+ break;
+
return COMPST_ERROR;
}
break;
@@ -499,11 +511,11 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp,
struct rxe_pkt_info *pkt,
struct rxe_send_wqe *wqe)
{
- qp->comp.opcode = -1;
-
- if (pkt) {
- if (psn_compare(pkt->psn, qp->comp.psn) >= 0)
- qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ if (pkt && wqe->state == wqe_state_pending) {
+ if (psn_compare(wqe->last_psn, qp->comp.psn) >= 0) {
+ qp->comp.psn = (wqe->last_psn + 1) & BTH_PSN_MASK;
+ qp->comp.opcode = -1;
+ }
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
@@ -676,6 +688,20 @@ int rxe_completer(void *arg)
goto exit;
}
+ /* if we've started a retry, don't start another
+ * retry sequence, unless this is a timeout.
+ */
+ if (qp->comp.started_retry &&
+ !qp->comp.timeout_retry) {
+ if (pkt) {
+ rxe_drop_ref(pkt->qp);
+ kfree_skb(skb);
+ skb = NULL;
+ }
+
+ goto done;
+ }
+
if (qp->comp.retry_cnt > 0) {
if (qp->comp.retry_cnt != 7)
qp->comp.retry_cnt--;
@@ -692,6 +718,7 @@ int rxe_completer(void *arg)
rxe_counter_inc(rxe,
RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
+ qp->comp.started_retry = 1;
rxe_run_task(&qp->req.task, 1);
}
@@ -701,7 +728,7 @@ int rxe_completer(void *arg)
skb = NULL;
}
- goto exit;
+ goto done;
} else {
rxe_counter_inc(rxe, RXE_CNT_RETRY_EXCEEDED);
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 2ee4b08b00ea..a57276f2cb84 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -30,7 +30,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-
+#include <linux/vmalloc.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
@@ -97,7 +97,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context,
cq->queue->buf, cq->queue->buf_size, &cq->queue->ip);
if (err) {
- kvfree(cq->queue->buf);
+ vfree(cq->queue->buf);
kfree(cq->queue);
return err;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 87d14f7ef21b..afd53f57a62b 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -144,8 +144,7 @@ void rxe_loopback(struct sk_buff *skb);
int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb);
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
int paylen, struct rxe_pkt_info *pkt);
-int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb, u32 *crc);
+int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc);
enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num);
const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num);
struct device *rxe_dma_device(struct rxe_dev *rxe);
@@ -196,7 +195,7 @@ static inline int qp_mtu(struct rxe_qp *qp)
if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
return qp->attr.path_mtu;
else
- return RXE_PORT_MAX_MTU;
+ return IB_MTU_4096;
}
static inline int rcv_wqe_size(int max_sge)
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index dff605fdf60f..9d3916b93f23 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -573,33 +573,20 @@ struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
int index = key >> 8;
- if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) {
- mem = rxe_pool_get_index(&rxe->mr_pool, index);
- if (!mem)
- goto err1;
- } else {
- goto err1;
+ mem = rxe_pool_get_index(&rxe->mr_pool, index);
+ if (!mem)
+ return NULL;
+
+ if (unlikely((type == lookup_local && mem->lkey != key) ||
+ (type == lookup_remote && mem->rkey != key) ||
+ mem->pd != pd ||
+ (access && !(access & mem->access)) ||
+ mem->state != RXE_MEM_STATE_VALID)) {
+ rxe_drop_ref(mem);
+ mem = NULL;
}
- if ((type == lookup_local && mem->lkey != key) ||
- (type == lookup_remote && mem->rkey != key))
- goto err2;
-
- if (mem->pd != pd)
- goto err2;
-
- if (access && !(access & mem->access))
- goto err2;
-
- if (mem->state != RXE_MEM_STATE_VALID)
- goto err2;
-
return mem;
-
-err2:
- rxe_drop_ref(mem);
-err1:
- return NULL;
}
int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 8094cbaa54a9..40e82e0f6c2d 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -72,7 +72,7 @@ struct rxe_dev *get_rxe_by_name(const char *name)
spin_lock_bh(&dev_list_lock);
list_for_each_entry(rxe, &rxe_dev_list, list) {
- if (!strcmp(name, rxe->ib_dev.name)) {
+ if (!strcmp(name, dev_name(&rxe->ib_dev.dev))) {
found = rxe;
break;
}
@@ -182,19 +182,11 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
#endif
-static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
+static struct dst_entry *rxe_find_route(struct net_device *ndev,
struct rxe_qp *qp,
struct rxe_av *av)
{
- const struct ib_gid_attr *attr;
struct dst_entry *dst = NULL;
- struct net_device *ndev;
-
- attr = rdma_get_gid_attr(&rxe->ib_dev, qp->attr.port_num,
- av->grh.sgid_index);
- if (IS_ERR(attr))
- return NULL;
- ndev = attr->ndev;
if (qp_type(qp) == IB_QPT_RC)
dst = sk_dst_get(qp->sk->sk);
@@ -229,7 +221,6 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
sk_dst_set(qp->sk->sk, dst);
}
}
- rdma_put_gid_attr(attr);
return dst;
}
@@ -377,8 +368,8 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
}
-static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb, struct rxe_av *av)
+static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb,
+ struct rxe_av *av)
{
struct rxe_qp *qp = pkt->qp;
struct dst_entry *dst;
@@ -387,7 +378,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr;
struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr;
- dst = rxe_find_route(rxe, qp, av);
+ dst = rxe_find_route(skb->dev, qp, av);
if (!dst) {
pr_err("Host not reachable\n");
return -EHOSTUNREACH;
@@ -396,8 +387,8 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
if (!memcmp(saddr, daddr, sizeof(*daddr)))
pkt->mask |= RXE_LOOPBACK_MASK;
- prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
- htons(ROCE_V2_UDP_DPORT));
+ prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),
+ cpu_to_be16(ROCE_V2_UDP_DPORT));
prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP,
av->grh.traffic_class, av->grh.hop_limit, df, xnet);
@@ -406,15 +397,15 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return 0;
}
-static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb, struct rxe_av *av)
+static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb,
+ struct rxe_av *av)
{
struct rxe_qp *qp = pkt->qp;
struct dst_entry *dst;
struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;
struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;
- dst = rxe_find_route(rxe, qp, av);
+ dst = rxe_find_route(skb->dev, qp, av);
if (!dst) {
pr_err("Host not reachable\n");
return -EHOSTUNREACH;
@@ -423,8 +414,8 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
if (!memcmp(saddr, daddr, sizeof(*daddr)))
pkt->mask |= RXE_LOOPBACK_MASK;
- prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
- htons(ROCE_V2_UDP_DPORT));
+ prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),
+ cpu_to_be16(ROCE_V2_UDP_DPORT));
prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP,
av->grh.traffic_class,
@@ -434,16 +425,15 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return 0;
}
-int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb, u32 *crc)
+int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc)
{
int err = 0;
struct rxe_av *av = rxe_get_av(pkt);
if (av->network_type == RDMA_NETWORK_IPV4)
- err = prepare4(rxe, pkt, skb, av);
+ err = prepare4(pkt, skb, av);
else if (av->network_type == RDMA_NETWORK_IPV6)
- err = prepare6(rxe, pkt, skb, av);
+ err = prepare6(pkt, skb, av);
*crc = rxe_icrc_hdr(pkt, skb);
@@ -501,11 +491,6 @@ void rxe_loopback(struct sk_buff *skb)
rxe_rcv(skb);
}
-static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av)
-{
- return rxe->port.port_guid == av->grh.dgid.global.interface_id;
-}
-
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
int paylen, struct rxe_pkt_info *pkt)
{
@@ -625,7 +610,7 @@ void rxe_port_up(struct rxe_dev *rxe)
port->attr.phys_state = IB_PHYS_STATE_LINK_UP;
rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);
- pr_info("set %s active\n", rxe->ib_dev.name);
+ dev_info(&rxe->ib_dev.dev, "set active\n");
}
/* Caller must hold net_info_lock */
@@ -638,7 +623,7 @@ void rxe_port_down(struct rxe_dev *rxe)
port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN;
rxe_port_event(rxe, IB_EVENT_PORT_ERR);
- pr_info("set %s down\n", rxe->ib_dev.name);
+ dev_info(&rxe->ib_dev.dev, "set down\n");
}
static int rxe_notify(struct notifier_block *not_blk,
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 4555510d86c4..bdea899a58ac 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -90,7 +90,6 @@ enum rxe_device_param {
RXE_MAX_RES_RD_ATOM = 0x3f000,
RXE_MAX_QP_INIT_RD_ATOM = 128,
RXE_MAX_EE_INIT_RD_ATOM = 0,
- RXE_ATOMIC_CAP = 1,
RXE_MAX_EE = 0,
RXE_MAX_RDD = 0,
RXE_MAX_MW = 0,
@@ -139,9 +138,6 @@ enum rxe_device_param {
/* default/initial rxe port parameters */
enum rxe_port_param {
- RXE_PORT_STATE = IB_PORT_DOWN,
- RXE_PORT_MAX_MTU = IB_MTU_4096,
- RXE_PORT_ACTIVE_MTU = IB_MTU_256,
RXE_PORT_GID_TBL_LEN = 1024,
RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP,
RXE_PORT_MAX_MSG_SZ = 0x800000,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index b4a8acc7bb7d..36b53fb94a49 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -207,7 +207,7 @@ int rxe_pool_init(
kref_init(&pool->ref_cnt);
- spin_lock_init(&pool->pool_lock);
+ rwlock_init(&pool->pool_lock);
if (rxe_type_info[type].flags & RXE_POOL_INDEX) {
err = rxe_pool_init_index(pool,
@@ -222,7 +222,7 @@ int rxe_pool_init(
pool->key_size = rxe_type_info[type].key_size;
}
- pool->state = rxe_pool_valid;
+ pool->state = RXE_POOL_STATE_VALID;
out:
return err;
@@ -232,7 +232,7 @@ static void rxe_pool_release(struct kref *kref)
{
struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
- pool->state = rxe_pool_invalid;
+ pool->state = RXE_POOL_STATE_INVALID;
kfree(pool->table);
}
@@ -245,12 +245,12 @@ int rxe_pool_cleanup(struct rxe_pool *pool)
{
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
- pool->state = rxe_pool_invalid;
+ write_lock_irqsave(&pool->pool_lock, flags);
+ pool->state = RXE_POOL_STATE_INVALID;
if (atomic_read(&pool->num_elem) > 0)
pr_warn("%s pool destroyed with unfree'd elem\n",
pool_name(pool));
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
rxe_pool_put(pool);
@@ -336,10 +336,10 @@ void rxe_add_key(void *arg, void *key)
struct rxe_pool *pool = elem->pool;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ write_lock_irqsave(&pool->pool_lock, flags);
memcpy((u8 *)elem + pool->key_offset, key, pool->key_size);
insert_key(pool, elem);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
}
void rxe_drop_key(void *arg)
@@ -348,9 +348,9 @@ void rxe_drop_key(void *arg)
struct rxe_pool *pool = elem->pool;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ write_lock_irqsave(&pool->pool_lock, flags);
rb_erase(&elem->node, &pool->tree);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
}
void rxe_add_index(void *arg)
@@ -359,10 +359,10 @@ void rxe_add_index(void *arg)
struct rxe_pool *pool = elem->pool;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ write_lock_irqsave(&pool->pool_lock, flags);
elem->index = alloc_index(pool);
insert_index(pool, elem);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
}
void rxe_drop_index(void *arg)
@@ -371,10 +371,10 @@ void rxe_drop_index(void *arg)
struct rxe_pool *pool = elem->pool;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ write_lock_irqsave(&pool->pool_lock, flags);
clear_bit(elem->index - pool->min_index, pool->table);
rb_erase(&elem->node, &pool->tree);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
}
void *rxe_alloc(struct rxe_pool *pool)
@@ -384,13 +384,13 @@ void *rxe_alloc(struct rxe_pool *pool)
might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
- spin_lock_irqsave(&pool->pool_lock, flags);
- if (pool->state != rxe_pool_valid) {
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ read_lock_irqsave(&pool->pool_lock, flags);
+ if (pool->state != RXE_POOL_STATE_VALID) {
+ read_unlock_irqrestore(&pool->pool_lock, flags);
return NULL;
}
kref_get(&pool->ref_cnt);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ read_unlock_irqrestore(&pool->pool_lock, flags);
kref_get(&pool->rxe->ref_cnt);
@@ -436,9 +436,9 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
struct rxe_pool_entry *elem = NULL;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ read_lock_irqsave(&pool->pool_lock, flags);
- if (pool->state != rxe_pool_valid)
+ if (pool->state != RXE_POOL_STATE_VALID)
goto out;
node = pool->tree.rb_node;
@@ -450,15 +450,14 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
node = node->rb_left;
else if (elem->index < index)
node = node->rb_right;
- else
+ else {
+ kref_get(&elem->ref_cnt);
break;
+ }
}
- if (node)
- kref_get(&elem->ref_cnt);
-
out:
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ read_unlock_irqrestore(&pool->pool_lock, flags);
return node ? elem : NULL;
}
@@ -469,9 +468,9 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
int cmp;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ read_lock_irqsave(&pool->pool_lock, flags);
- if (pool->state != rxe_pool_valid)
+ if (pool->state != RXE_POOL_STATE_VALID)
goto out;
node = pool->tree.rb_node;
@@ -494,6 +493,6 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
kref_get(&elem->ref_cnt);
out:
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ read_unlock_irqrestore(&pool->pool_lock, flags);
return node ? elem : NULL;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 47df28e43acf..aa4ba307097b 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -74,8 +74,8 @@ struct rxe_type_info {
extern struct rxe_type_info rxe_type_info[];
enum rxe_pool_state {
- rxe_pool_invalid,
- rxe_pool_valid,
+ RXE_POOL_STATE_INVALID,
+ RXE_POOL_STATE_VALID,
};
struct rxe_pool_entry {
@@ -90,7 +90,7 @@ struct rxe_pool_entry {
struct rxe_pool {
struct rxe_dev *rxe;
- spinlock_t pool_lock; /* pool spinlock */
+ rwlock_t pool_lock; /* protects pool add/del/search */
size_t elem_size;
struct kref ref_cnt;
void (*cleanup)(struct rxe_pool_entry *obj);
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index c58452daffc7..b9710907dac2 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -34,6 +34,7 @@
#include <linux/skbuff.h>
#include <linux/delay.h>
#include <linux/sched.h>
+#include <linux/vmalloc.h>
#include "rxe.h"
#include "rxe_loc.h"
@@ -227,6 +228,16 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
return err;
qp->sk->sk->sk_user_data = qp;
+ /* pick a source UDP port number for this QP based on
+ * the source QPN. this spreads traffic for different QPs
+ * across different NIC RX queues (while using a single
+ * flow for a given QP to maintain packet order).
+ * the port number must be in the Dynamic Ports range
+ * (0xc000 - 0xffff).
+ */
+ qp->src_port = RXE_ROCE_V2_SPORT +
+ (hash_32_generic(qp_num(qp), 14) & 0x3fff);
+
qp->sq.max_wr = init->cap.max_send_wr;
qp->sq.max_sge = init->cap.max_send_sge;
qp->sq.max_inline = init->cap.max_inline_data;
@@ -247,7 +258,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
&qp->sq.queue->ip);
if (err) {
- kvfree(qp->sq.queue->buf);
+ vfree(qp->sq.queue->buf);
kfree(qp->sq.queue);
return err;
}
@@ -300,7 +311,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
qp->rq.queue->buf, qp->rq.queue->buf_size,
&qp->rq.queue->ip);
if (err) {
- kvfree(qp->rq.queue->buf);
+ vfree(qp->rq.queue->buf);
kfree(qp->rq.queue);
return err;
}
@@ -408,8 +419,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
enum ib_qp_state new_state = (mask & IB_QP_STATE) ?
attr->qp_state : cur_state;
- if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask,
- IB_LINK_LAYER_ETHERNET)) {
+ if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) {
pr_warn("invalid mask or state for qp\n");
goto err1;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index d30dbac24583..5c29a1bb575a 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -122,7 +122,7 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
set_bad_pkey_cntr(port);
goto err1;
}
- } else if (qpn != 0) {
+ } else {
if (unlikely(!pkey_match(pkey,
port->pkey_tbl[qp->attr.pkey_index]
))) {
@@ -134,7 +134,7 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
}
if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) &&
- qpn != 0 && pkt->mask) {
+ pkt->mask) {
u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey;
if (unlikely(deth_qkey(pkt) != qkey)) {
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 8be27238a86e..6c361d70d7cd 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -73,9 +73,6 @@ static void req_retry(struct rxe_qp *qp)
int npsn;
int first = 1;
- wqe = queue_head(qp->sq.queue);
- npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK;
-
qp->req.wqe_index = consumer_index(qp->sq.queue);
qp->req.psn = qp->comp.psn;
qp->req.opcode = -1;
@@ -107,11 +104,17 @@ static void req_retry(struct rxe_qp *qp)
if (first) {
first = 0;
- if (mask & WR_WRITE_OR_SEND_MASK)
+ if (mask & WR_WRITE_OR_SEND_MASK) {
+ npsn = (qp->comp.psn - wqe->first_psn) &
+ BTH_PSN_MASK;
retry_first_write_send(qp, wqe, mask, npsn);
+ }
- if (mask & WR_READ_MASK)
+ if (mask & WR_READ_MASK) {
+ npsn = (wqe->dma.length - wqe->dma.resid) /
+ qp->mtu;
wqe->iova += npsn * qp->mtu;
+ }
}
wqe->state = wqe_state_posted;
@@ -435,7 +438,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
if (pkt->mask & RXE_RETH_MASK) {
reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
reth_set_va(pkt, wqe->iova);
- reth_set_len(pkt, wqe->dma.length);
+ reth_set_len(pkt, wqe->dma.resid);
}
if (pkt->mask & RXE_IMMDT_MASK)
@@ -476,7 +479,7 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
u32 *p;
int err;
- err = rxe_prepare(rxe, pkt, skb, &crc);
+ err = rxe_prepare(pkt, skb, &crc);
if (err)
return err;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index aa5833318372..c962160292f4 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -637,7 +637,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
if (ack->mask & RXE_ATMACK_MASK)
atmack_set_orig(ack, qp->resp.atomic_orig);
- err = rxe_prepare(rxe, ack, skb, &crc);
+ err = rxe_prepare(ack, skb, &crc);
if (err) {
kfree_skb(skb);
return NULL;
@@ -682,6 +682,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
rxe_advance_resp_resource(qp);
res->type = RXE_READ_MASK;
+ res->replay = 0;
res->read.va = qp->resp.va;
res->read.va_org = qp->resp.va;
@@ -752,7 +753,8 @@ static enum resp_states read_reply(struct rxe_qp *qp,
state = RESPST_DONE;
} else {
qp->resp.res = NULL;
- qp->resp.opcode = -1;
+ if (!res->replay)
+ qp->resp.opcode = -1;
if (psn_compare(res->cur_psn, qp->resp.psn) >= 0)
qp->resp.psn = res->cur_psn;
state = RESPST_CLEANUP;
@@ -814,6 +816,7 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
/* next expected psn, read handles this separately */
qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ qp->resp.ack_psn = qp->resp.psn;
qp->resp.opcode = pkt->opcode;
qp->resp.status = IB_WC_SUCCESS;
@@ -1065,7 +1068,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
enum resp_states rc;
- u32 prev_psn = (qp->resp.psn - 1) & BTH_PSN_MASK;
+ u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK;
if (pkt->mask & RXE_SEND_MASK ||
pkt->mask & RXE_WRITE_MASK) {
@@ -1108,6 +1111,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
res->state = (pkt->psn == res->first_psn) ?
rdatm_res_state_new :
rdatm_res_state_replay;
+ res->replay = 1;
/* Reset the resource, except length. */
res->read.va_org = iova;
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index 0d6c04ba7fc3..c41a5fee81f7 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <linux/vmalloc.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
@@ -129,13 +130,18 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context, q->buf,
q->buf_size, &q->ip);
- if (err)
+ if (err) {
+ vfree(q->buf);
+ kfree(q);
return err;
+ }
if (uresp) {
if (copy_to_user(&uresp->srq_num, &srq->srq_num,
- sizeof(uresp->srq_num)))
+ sizeof(uresp->srq_num))) {
+ rxe_queue_cleanup(q);
return -EFAULT;
+ }
}
return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c
index d5ed7571128f..73a19f808e1b 100644
--- a/drivers/infiniband/sw/rxe/rxe_sysfs.c
+++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c
@@ -105,7 +105,7 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
}
rxe_set_port_state(ndev);
- pr_info("added %s to %s\n", rxe->ib_dev.name, intf);
+ dev_info(&rxe->ib_dev.dev, "added %s\n", intf);
err:
if (ndev)
dev_put(ndev);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index f5b1e0ad6142..9c19f2027511 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1148,18 +1148,21 @@ static ssize_t parent_show(struct device *device,
static DEVICE_ATTR_RO(parent);
-static struct device_attribute *rxe_dev_attributes[] = {
- &dev_attr_parent,
+static struct attribute *rxe_dev_attributes[] = {
+ &dev_attr_parent.attr,
+ NULL
+};
+
+static const struct attribute_group rxe_attr_group = {
+ .attrs = rxe_dev_attributes,
};
int rxe_register_device(struct rxe_dev *rxe)
{
int err;
- int i;
struct ib_device *dev = &rxe->ib_dev;
struct crypto_shash *tfm;
- strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX);
strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
dev->owner = THIS_MODULE;
@@ -1260,26 +1263,16 @@ int rxe_register_device(struct rxe_dev *rxe)
}
rxe->tfm = tfm;
+ rdma_set_device_sysfs_group(dev, &rxe_attr_group);
dev->driver_id = RDMA_DRIVER_RXE;
- err = ib_register_device(dev, NULL);
+ err = ib_register_device(dev, "rxe%d", NULL);
if (err) {
pr_warn("%s failed with error %d\n", __func__, err);
goto err1;
}
- for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) {
- err = device_create_file(&dev->dev, rxe_dev_attributes[i]);
- if (err) {
- pr_warn("%s failed with error %d for attr number %d\n",
- __func__, err, i);
- goto err2;
- }
- }
-
return 0;
-err2:
- ib_unregister_device(dev);
err1:
crypto_free_shash(rxe->tfm);
@@ -1288,12 +1281,8 @@ err1:
int rxe_unregister_device(struct rxe_dev *rxe)
{
- int i;
struct ib_device *dev = &rxe->ib_dev;
- for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i)
- device_remove_file(&dev->dev, rxe_dev_attributes[i]);
-
ib_unregister_device(dev);
return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index af1470d29391..82e670d6eeea 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -158,6 +158,7 @@ struct rxe_comp_info {
int opcode;
int timeout;
int timeout_retry;
+ int started_retry;
u32 retry_cnt;
u32 rnr_retry;
struct rxe_task task;
@@ -171,6 +172,7 @@ enum rdatm_res_state {
struct resp_res {
int type;
+ int replay;
u32 first_psn;
u32 last_psn;
u32 cur_psn;
@@ -195,6 +197,7 @@ struct rxe_resp_info {
enum rxe_qp_state state;
u32 msn;
u32 psn;
+ u32 ack_psn;
int opcode;
int drop_msg;
int goto_error;
@@ -248,6 +251,7 @@ struct rxe_qp {
struct socket *sk;
u32 dst_cookie;
+ u16 src_port;
struct rxe_av pri_av;
struct rxe_av alt_av;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index ea01b8dd2be6..0428e01e8f69 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1027,12 +1027,14 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id,
skb_queue_head_init(&skqueue);
+ netif_tx_lock_bh(p->dev);
spin_lock_irq(&priv->lock);
set_bit(IPOIB_FLAG_OPER_UP, &p->flags);
if (p->neigh)
while ((skb = __skb_dequeue(&p->neigh->queue)))
__skb_queue_tail(&skqueue, skb);
spin_unlock_irq(&priv->lock);
+ netif_tx_unlock_bh(p->dev);
while ((skb = __skb_dequeue(&skqueue))) {
skb->dev = p->dev;
@@ -1436,11 +1438,15 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);
- if (skb->protocol == htons(ETH_P_IP))
+ if (skb->protocol == htons(ETH_P_IP)) {
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+ }
#if IS_ENABLED(CONFIG_IPV6)
- else if (skb->protocol == htons(ETH_P_IPV6))
+ else if (skb->protocol == htons(ETH_P_IPV6)) {
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ }
#endif
dev_kfree_skb_any(skb);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index e3d28f9ad9c0..1df90f0d9e64 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -243,7 +243,8 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
- if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu))
+ if (new_mtu < (ETH_MIN_MTU + IPOIB_ENCAP_LEN) ||
+ new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu))
return -EINVAL;
priv->admin_mtu = new_mtu;
@@ -1880,6 +1881,8 @@ static int ipoib_parent_init(struct net_device *ndev)
sizeof(union ib_gid));
SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent);
+ priv->dev->dev_port = priv->port - 1;
+ /* Let's set this one too for backwards compatibility. */
priv->dev->dev_id = priv->port - 1;
return 0;
@@ -2384,6 +2387,35 @@ int ipoib_add_pkey_attr(struct net_device *dev)
return device_create_file(&dev->dev, &dev_attr_pkey);
}
+/*
+ * We erroneously exposed the iface's port number in the dev_id
+ * sysfs field long after dev_port was introduced for that purpose[1],
+ * and we need to stop everyone from relying on that.
+ * Let's overload the shower routine for the dev_id file here
+ * to gently bring the issue up.
+ *
+ * [1] https://www.spinics.net/lists/netdev/msg272123.html
+ */
+static ssize_t dev_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct net_device *ndev = to_net_dev(dev);
+
+ if (ndev->dev_id == ndev->dev_port)
+ netdev_info_once(ndev,
+ "\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n",
+ current->comm);
+
+ return sprintf(buf, "%#x\n", ndev->dev_id);
+}
+static DEVICE_ATTR_RO(dev_id);
+
+int ipoib_intercept_dev_id_attr(struct net_device *dev)
+{
+ device_remove_file(&dev->dev, &dev_attr_dev_id);
+ return device_create_file(&dev->dev, &dev_attr_dev_id);
+}
+
static struct net_device *ipoib_add_port(const char *format,
struct ib_device *hca, u8 port)
{
@@ -2425,6 +2457,8 @@ static struct net_device *ipoib_add_port(const char *format,
*/
ndev->priv_destructor = ipoib_intf_free;
+ if (ipoib_intercept_dev_id_attr(ndev))
+ goto sysfs_failed;
if (ipoib_cm_add_mode_attr(ndev))
goto sysfs_failed;
if (ipoib_add_pkey_attr(ndev))
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 9f36ca786df8..1e88213459f2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -277,7 +277,7 @@ void ipoib_event(struct ib_event_handler *handler,
return;
ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event,
- record->device->name, record->element.port_num);
+ dev_name(&record->device->dev), record->element.port_num);
if (record->event == IB_EVENT_SM_CHANGE ||
record->event == IB_EVENT_CLIENT_REREGISTER) {
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 2f6388596f88..96af06cfe0af 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -589,13 +589,19 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
ib_conn->post_recv_buf_count--;
}
-static inline void
+static inline int
iser_inv_desc(struct iser_fr_desc *desc, u32 rkey)
{
- if (likely(rkey == desc->rsc.mr->rkey))
+ if (likely(rkey == desc->rsc.mr->rkey)) {
desc->rsc.mr_valid = 0;
- else if (likely(rkey == desc->pi_ctx->sig_mr->rkey))
+ } else if (likely(desc->pi_ctx && rkey == desc->pi_ctx->sig_mr->rkey)) {
desc->pi_ctx->sig_mr_valid = 0;
+ } else {
+ iser_err("Bogus remote invalidation for rkey %#x\n", rkey);
+ return -EINVAL;
+ }
+
+ return 0;
}
static int
@@ -623,12 +629,14 @@ iser_check_remote_inv(struct iser_conn *iser_conn,
if (iser_task->dir[ISER_DIR_IN]) {
desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h;
- iser_inv_desc(desc, rkey);
+ if (unlikely(iser_inv_desc(desc, rkey)))
+ return -EINVAL;
}
if (iser_task->dir[ISER_DIR_OUT]) {
desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h;
- iser_inv_desc(desc, rkey);
+ if (unlikely(iser_inv_desc(desc, rkey)))
+ return -EINVAL;
}
} else {
iser_err("failed to get task for itt=%d\n", hdr->itt);
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index b686a4aaffe8..946b623ba5eb 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -55,7 +55,7 @@ static void iser_event_handler(struct ib_event_handler *handler,
{
iser_err("async event %s (%d) on device %s port %d\n",
ib_event_msg(event->event), event->event,
- event->device->name, event->element.port_num);
+ dev_name(&event->device->dev), event->element.port_num);
}
/**
@@ -85,7 +85,7 @@ static int iser_create_device_ib_res(struct iser_device *device)
max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe);
iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
- device->comps_used, ib_dev->name,
+ device->comps_used, dev_name(&ib_dev->dev),
ib_dev->num_comp_vectors, max_cqe);
device->pd = ib_alloc_pd(ib_dev,
@@ -468,7 +468,8 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
iser_conn->max_cmds =
ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr);
iser_dbg("device %s supports max_send_wr %d\n",
- device->ib_device->name, ib_dev->attrs.max_qp_wr);
+ dev_name(&device->ib_device->dev),
+ ib_dev->attrs.max_qp_wr);
}
}
@@ -764,7 +765,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
IB_DEVICE_SIGNATURE_HANDOVER)) {
iser_warn("T10-PI requested but not supported on %s, "
"continue without T10-PI\n",
- ib_conn->device->ib_device->name);
+ dev_name(&ib_conn->device->ib_device->dev));
ib_conn->pi_support = false;
} else {
ib_conn->pi_support = true;
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index f39670c5c25c..e3dd13798d79 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -262,7 +262,7 @@ isert_alloc_comps(struct isert_device *device)
isert_info("Using %d CQs, %s supports %d vectors support "
"pi_capable %d\n",
- device->comps_used, device->ib_device->name,
+ device->comps_used, dev_name(&device->ib_device->dev),
device->ib_device->num_comp_vectors,
device->pi_capable);
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
index 267da8215e08..31cd361416ac 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -351,7 +351,8 @@ static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter *adapter,
if (unlikely(!dlid))
v_warn("Null dlid in MAC address\n");
} else if (def_port != OPA_VNIC_INVALID_PORT) {
- dlid = info->vesw.u_ucast_dlid[def_port];
+ if (def_port < OPA_VESW_MAX_NUM_DEF_PORT)
+ dlid = info->vesw.u_ucast_dlid[def_port];
}
}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
index 15711dcc6f58..d119d9afa845 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -888,7 +888,8 @@ static void opa_vnic_event(struct ib_event_handler *handler,
return;
c_dbg("OPA_VNIC received event %d on device %s port %d\n",
- record->event, record->device->name, record->element.port_num);
+ record->event, dev_name(&record->device->dev),
+ record->element.port_num);
if (record->event == IB_EVENT_PORT_ERR)
idr_for_each(&port->vport_idr, vema_disable_vport, NULL);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 444d16520506..36ade9ae7dde 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -2951,7 +2951,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(scmnd->device->host);
struct srp_rdma_ch *ch;
- int i;
+ int i, j;
u8 status;
shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
@@ -2965,8 +2965,8 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
for (i = 0; i < target->ch_count; i++) {
ch = &target->ch[i];
- for (i = 0; i < target->req_ring_size; ++i) {
- struct srp_request *req = &ch->req_ring[i];
+ for (j = 0; j < target->req_ring_size; ++j) {
+ struct srp_request *req = &ch->req_ring[j];
srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
}
@@ -3124,7 +3124,8 @@ static ssize_t show_local_ib_device(struct device *dev,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
+ return sprintf(buf, "%s\n",
+ dev_name(&target->srp_host->srp_dev->dev->dev));
}
static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
@@ -3987,7 +3988,7 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
{
struct srp_host *host = container_of(dev, struct srp_host, dev);
- return sprintf(buf, "%s\n", host->srp_dev->dev->name);
+ return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev));
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
@@ -4019,7 +4020,8 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
host->dev.class = &srp_class;
host->dev.parent = device->dev->dev.parent;
- dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
+ dev_set_name(&host->dev, "srp-%s-%d", dev_name(&device->dev->dev),
+ port);
if (device_register(&host->dev))
goto free_host;
@@ -4095,7 +4097,7 @@ static void srp_add_one(struct ib_device *device)
srp_dev->mr_max_size = srp_dev->mr_page_size *
srp_dev->max_pages_per_mr;
pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
- device->name, mr_page_shift, attr->max_mr_size,
+ dev_name(&device->dev), mr_page_shift, attr->max_mr_size,
attr->max_fast_reg_page_list_len,
srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index f37cbad022a2..2357aa727dcf 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -148,7 +148,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
return;
pr_debug("ASYNC event= %d on device= %s\n", event->event,
- sdev->device->name);
+ dev_name(&sdev->device->dev));
switch (event->event) {
case IB_EVENT_PORT_ERR:
@@ -1941,7 +1941,8 @@ static void __srpt_close_all_ch(struct srpt_port *sport)
if (srpt_disconnect_ch(ch) >= 0)
pr_info("Closing channel %s because target %s_%d has been disabled\n",
ch->sess_name,
- sport->sdev->device->name, sport->port);
+ dev_name(&sport->sdev->device->dev),
+ sport->port);
srpt_close_ch(ch);
}
}
@@ -2127,7 +2128,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
if (!sport->enabled) {
rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
pr_info("rejected SRP_LOGIN_REQ because target port %s_%d has not yet been enabled\n",
- sport->sdev->device->name, port_num);
+ dev_name(&sport->sdev->device->dev), port_num);
goto reject;
}
@@ -2267,7 +2268,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
rej->reason = cpu_to_be32(
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
pr_info("rejected SRP_LOGIN_REQ because target %s_%d is not enabled\n",
- sdev->device->name, port_num);
+ dev_name(&sdev->device->dev), port_num);
mutex_unlock(&sport->mutex);
goto reject;
}
@@ -2708,7 +2709,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
break;
}
- if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT)))
+ if (WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))
return;
/* For read commands, transfer the data to the initiator. */
@@ -2842,7 +2843,7 @@ static int srpt_release_sport(struct srpt_port *sport)
while (wait_event_timeout(sport->ch_releaseQ,
srpt_ch_list_empty(sport), 5 * HZ) <= 0) {
pr_info("%s_%d: waiting for session unregistration ...\n",
- sport->sdev->device->name, sport->port);
+ dev_name(&sport->sdev->device->dev), sport->port);
rcu_read_lock();
list_for_each_entry(nexus, &sport->nexus_list, entry) {
list_for_each_entry(ch, &nexus->ch_list, list) {
@@ -2932,7 +2933,7 @@ static int srpt_alloc_srq(struct srpt_device *sdev)
}
pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", sdev->srq_size,
- sdev->device->attrs.max_srq_wr, device->name);
+ sdev->device->attrs.max_srq_wr, dev_name(&device->dev));
sdev->ioctx_ring = (struct srpt_recv_ioctx **)
srpt_alloc_ioctx_ring(sdev, sdev->srq_size,
@@ -2965,8 +2966,8 @@ static int srpt_use_srq(struct srpt_device *sdev, bool use_srq)
} else if (use_srq && !sdev->srq) {
ret = srpt_alloc_srq(sdev);
}
- pr_debug("%s(%s): use_srq = %d; ret = %d\n", __func__, device->name,
- sdev->use_srq, ret);
+ pr_debug("%s(%s): use_srq = %d; ret = %d\n", __func__,
+ dev_name(&device->dev), sdev->use_srq, ret);
return ret;
}
@@ -3052,7 +3053,7 @@ static void srpt_add_one(struct ib_device *device)
if (srpt_refresh_port(sport)) {
pr_err("MAD registration failed for %s-%d.\n",
- sdev->device->name, i);
+ dev_name(&sdev->device->dev), i);
goto err_event;
}
}
@@ -3063,7 +3064,7 @@ static void srpt_add_one(struct ib_device *device)
out:
ib_set_client_data(device, &srpt_client, sdev);
- pr_debug("added %s.\n", device->name);
+ pr_debug("added %s.\n", dev_name(&device->dev));
return;
err_event:
@@ -3078,7 +3079,7 @@ free_dev:
kfree(sdev);
err:
sdev = NULL;
- pr_info("%s(%s) failed.\n", __func__, device->name);
+ pr_info("%s(%s) failed.\n", __func__, dev_name(&device->dev));
goto out;
}
@@ -3093,7 +3094,8 @@ static void srpt_remove_one(struct ib_device *device, void *client_data)
int i;
if (!sdev) {
- pr_info("%s(%s): nothing to do.\n", __func__, device->name);
+ pr_info("%s(%s): nothing to do.\n", __func__,
+ dev_name(&device->dev));
return;
}
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 648eb6743ed5..6edffeed9953 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -238,10 +238,6 @@ static void mmc_mq_exit_request(struct blk_mq_tag_set *set, struct request *req,
mmc_exit_request(mq->queue, req);
}
-/*
- * We use BLK_MQ_F_BLOCKING and have only 1 hardware queue, which means requests
- * will not be dispatched in parallel.
- */
static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -264,7 +260,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
spin_lock_irq(q->queue_lock);
- if (mq->recovery_needed) {
+ if (mq->recovery_needed || mq->busy) {
spin_unlock_irq(q->queue_lock);
return BLK_STS_RESOURCE;
}
@@ -291,6 +287,9 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
break;
}
+ /* Parallel dispatch of requests is not supported at the moment */
+ mq->busy = true;
+
mq->in_flight[issue_type] += 1;
get_card = (mmc_tot_in_flight(mq) == 1);
cqe_retune_ok = (mmc_cqe_qcnt(mq) == 1);
@@ -333,9 +332,12 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
mq->in_flight[issue_type] -= 1;
if (mmc_tot_in_flight(mq) == 0)
put_card = true;
+ mq->busy = false;
spin_unlock_irq(q->queue_lock);
if (put_card)
mmc_put_card(card, &mq->ctx);
+ } else {
+ WRITE_ONCE(mq->busy, false);
}
return ret;
diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h
index 17e59d50b496..9bf3c9245075 100644
--- a/drivers/mmc/core/queue.h
+++ b/drivers/mmc/core/queue.h
@@ -81,6 +81,7 @@ struct mmc_queue {
unsigned int cqe_busy;
#define MMC_CQE_DCMD_BUSY BIT(0)
#define MMC_CQE_QUEUE_FULL BIT(1)
+ bool busy;
bool use_cqe;
bool recovery_needed;
bool in_recovery;
diff --git a/drivers/mmc/host/android-goldfish.c b/drivers/mmc/host/android-goldfish.c
index 294de177632c..61e4e2a213c9 100644
--- a/drivers/mmc/host/android-goldfish.c
+++ b/drivers/mmc/host/android-goldfish.c
@@ -217,7 +217,7 @@ static void goldfish_mmc_xfer_done(struct goldfish_mmc_host *host,
* We don't really have DMA, so we need
* to copy from our platform driver buffer
*/
- sg_copy_to_buffer(data->sg, 1, host->virt_base,
+ sg_copy_from_buffer(data->sg, 1, host->virt_base,
data->sg->length);
}
host->data->bytes_xfered += data->sg->length;
@@ -393,7 +393,7 @@ static void goldfish_mmc_prepare_data(struct goldfish_mmc_host *host,
* We don't really have DMA, so we need to copy to our
* platform driver buffer
*/
- sg_copy_from_buffer(data->sg, 1, host->virt_base,
+ sg_copy_to_buffer(data->sg, 1, host->virt_base,
data->sg->length);
}
}
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 5aa2c9404e92..be53044086c7 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -1976,7 +1976,7 @@ static void atmci_read_data_pio(struct atmel_mci *host)
do {
value = atmci_readl(host, ATMCI_RDR);
if (likely(offset + 4 <= sg->length)) {
- sg_pcopy_to_buffer(sg, 1, &value, sizeof(u32), offset);
+ sg_pcopy_from_buffer(sg, 1, &value, sizeof(u32), offset);
offset += 4;
nbytes += 4;
@@ -1993,7 +1993,7 @@ static void atmci_read_data_pio(struct atmel_mci *host)
} else {
unsigned int remaining = sg->length - offset;
- sg_pcopy_to_buffer(sg, 1, &value, remaining, offset);
+ sg_pcopy_from_buffer(sg, 1, &value, remaining, offset);
nbytes += remaining;
flush_dcache_page(sg_page(sg));
@@ -2003,7 +2003,7 @@ static void atmci_read_data_pio(struct atmel_mci *host)
goto done;
offset = 4 - remaining;
- sg_pcopy_to_buffer(sg, 1, (u8 *)&value + remaining,
+ sg_pcopy_from_buffer(sg, 1, (u8 *)&value + remaining,
offset, 0);
nbytes += offset;
}
@@ -2042,7 +2042,7 @@ static void atmci_write_data_pio(struct atmel_mci *host)
do {
if (likely(offset + 4 <= sg->length)) {
- sg_pcopy_from_buffer(sg, 1, &value, sizeof(u32), offset);
+ sg_pcopy_to_buffer(sg, 1, &value, sizeof(u32), offset);
atmci_writel(host, ATMCI_TDR, value);
offset += 4;
@@ -2059,7 +2059,7 @@ static void atmci_write_data_pio(struct atmel_mci *host)
unsigned int remaining = sg->length - offset;
value = 0;
- sg_pcopy_from_buffer(sg, 1, &value, remaining, offset);
+ sg_pcopy_to_buffer(sg, 1, &value, remaining, offset);
nbytes += remaining;
host->sg = sg = sg_next(sg);
@@ -2070,7 +2070,7 @@ static void atmci_write_data_pio(struct atmel_mci *host)
}
offset = 4 - remaining;
- sg_pcopy_from_buffer(sg, 1, (u8 *)&value + remaining,
+ sg_pcopy_to_buffer(sg, 1, (u8 *)&value + remaining,
offset, 0);
atmci_writel(host, ATMCI_TDR, value);
nbytes += offset;
diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
index 35cc0de6be67..ca0b43973769 100644
--- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
@@ -45,14 +45,16 @@
/* DM_CM_RST */
#define RST_DTRANRST1 BIT(9)
#define RST_DTRANRST0 BIT(8)
-#define RST_RESERVED_BITS GENMASK_ULL(32, 0)
+#define RST_RESERVED_BITS GENMASK_ULL(31, 0)
/* DM_CM_INFO1 and DM_CM_INFO1_MASK */
#define INFO1_CLEAR 0
+#define INFO1_MASK_CLEAR GENMASK_ULL(31, 0)
#define INFO1_DTRANEND1 BIT(17)
#define INFO1_DTRANEND0 BIT(16)
/* DM_CM_INFO2 and DM_CM_INFO2_MASK */
+#define INFO2_MASK_CLEAR GENMASK_ULL(31, 0)
#define INFO2_DTRANERR1 BIT(17)
#define INFO2_DTRANERR0 BIT(16)
@@ -252,6 +254,12 @@ renesas_sdhi_internal_dmac_request_dma(struct tmio_mmc_host *host,
{
struct renesas_sdhi *priv = host_to_priv(host);
+ /* Disable DMAC interrupts, we don't use them */
+ renesas_sdhi_internal_dmac_dm_write(host, DM_CM_INFO1_MASK,
+ INFO1_MASK_CLEAR);
+ renesas_sdhi_internal_dmac_dm_write(host, DM_CM_INFO2_MASK,
+ INFO2_MASK_CLEAR);
+
/* Each value is set to non-zero to assume "enabling" each DMA */
host->chan_rx = host->chan_tx = (void *)0xdeadbeaf;
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index ca18612c4201..67b2065e7a19 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -1338,6 +1338,11 @@ int denali_init(struct denali_nand_info *denali)
denali_enable_irq(denali);
denali_reset_banks(denali);
+ if (!denali->max_banks) {
+ /* Error out earlier if no chip is found for some reasons. */
+ ret = -ENODEV;
+ goto disable_irq;
+ }
denali->active_bank = DENALI_INVALID_BANK;
diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c
index a3f04315c05c..427fcbc1b71c 100644
--- a/drivers/mtd/nand/raw/docg4.c
+++ b/drivers/mtd/nand/raw/docg4.c
@@ -1218,7 +1218,7 @@ static int docg4_resume(struct platform_device *pdev)
return 0;
}
-static void __init init_mtd_structs(struct mtd_info *mtd)
+static void init_mtd_structs(struct mtd_info *mtd)
{
/* initialize mtd and nand data structures */
@@ -1290,7 +1290,7 @@ static void __init init_mtd_structs(struct mtd_info *mtd)
}
-static int __init read_id_reg(struct mtd_info *mtd)
+static int read_id_reg(struct mtd_info *mtd)
{
struct nand_chip *nand = mtd_to_nand(mtd);
struct docg4_priv *doc = nand_get_controller_data(nand);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 139d96c5a023..092c817f8f11 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -110,16 +110,14 @@ static int bnxt_tc_parse_actions(struct bnxt *bp,
struct tcf_exts *tc_exts)
{
const struct tc_action *tc_act;
- LIST_HEAD(tc_actions);
- int rc;
+ int i, rc;
if (!tcf_exts_has_actions(tc_exts)) {
netdev_info(bp->dev, "no actions");
return -EINVAL;
}
- tcf_exts_to_list(tc_exts, &tc_actions);
- list_for_each_entry(tc_act, &tc_actions, list) {
+ tcf_exts_for_each_action(i, tc_act, tc_exts) {
/* Drop action */
if (is_tcf_gact_shot(tc_act)) {
actions->flags |= BNXT_TC_ACTION_FLAG_DROP;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index dc09f9a8a49b..c6707ea2d751 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -482,11 +482,6 @@ static int macb_mii_probe(struct net_device *dev)
if (np) {
if (of_phy_is_fixed_link(np)) {
- if (of_phy_register_fixed_link(np) < 0) {
- dev_err(&bp->pdev->dev,
- "broken fixed-link specification\n");
- return -ENODEV;
- }
bp->phy_node = of_node_get(np);
} else {
bp->phy_node = of_parse_phandle(np, "phy-handle", 0);
@@ -569,7 +564,7 @@ static int macb_mii_init(struct macb *bp)
{
struct macb_platform_data *pdata;
struct device_node *np;
- int err;
+ int err = -ENXIO;
/* Enable management port */
macb_writel(bp, NCR, MACB_BIT(MPE));
@@ -592,12 +587,23 @@ static int macb_mii_init(struct macb *bp)
dev_set_drvdata(&bp->dev->dev, bp->mii_bus);
np = bp->pdev->dev.of_node;
- if (pdata)
- bp->mii_bus->phy_mask = pdata->phy_mask;
+ if (np && of_phy_is_fixed_link(np)) {
+ if (of_phy_register_fixed_link(np) < 0) {
+ dev_err(&bp->pdev->dev,
+ "broken fixed-link specification %pOF\n", np);
+ goto err_out_free_mdiobus;
+ }
+
+ err = mdiobus_register(bp->mii_bus);
+ } else {
+ if (pdata)
+ bp->mii_bus->phy_mask = pdata->phy_mask;
+
+ err = of_mdiobus_register(bp->mii_bus, np);
+ }
- err = of_mdiobus_register(bp->mii_bus, np);
if (err)
- goto err_out_free_mdiobus;
+ goto err_out_free_fixed_link;
err = macb_mii_probe(bp->dev);
if (err)
@@ -607,6 +613,7 @@ static int macb_mii_init(struct macb *bp)
err_out_unregister_bus:
mdiobus_unregister(bp->mii_bus);
+err_out_free_fixed_link:
if (np && of_phy_is_fixed_link(np))
of_phy_deregister_fixed_link(np);
err_out_free_mdiobus:
@@ -2028,14 +2035,17 @@ static void macb_reset_hw(struct macb *bp)
{
struct macb_queue *queue;
unsigned int q;
+ u32 ctrl = macb_readl(bp, NCR);
/* Disable RX and TX (XXX: Should we halt the transmission
* more gracefully?)
*/
- macb_writel(bp, NCR, 0);
+ ctrl &= ~(MACB_BIT(RE) | MACB_BIT(TE));
/* Clear the stats registers (XXX: Update stats first?) */
- macb_writel(bp, NCR, MACB_BIT(CLRSTAT));
+ ctrl |= MACB_BIT(CLRSTAT);
+
+ macb_writel(bp, NCR, ctrl);
/* Clear all status flags */
macb_writel(bp, TSR, -1);
@@ -2223,7 +2233,7 @@ static void macb_init_hw(struct macb *bp)
}
/* Enable TX and RX */
- macb_writel(bp, NCR, MACB_BIT(RE) | MACB_BIT(TE) | MACB_BIT(MPE));
+ macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(RE) | MACB_BIT(TE));
}
/* The hash address register is 64 bits long and takes up two
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index 623f73dd7738..c116f96956fe 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -417,10 +417,9 @@ static void cxgb4_process_flow_actions(struct net_device *in,
struct ch_filter_specification *fs)
{
const struct tc_action *a;
- LIST_HEAD(actions);
+ int i;
- tcf_exts_to_list(cls->exts, &actions);
- list_for_each_entry(a, &actions, list) {
+ tcf_exts_for_each_action(i, a, cls->exts) {
if (is_tcf_gact_ok(a)) {
fs->action = FILTER_PASS;
} else if (is_tcf_gact_shot(a)) {
@@ -591,10 +590,9 @@ static int cxgb4_validate_flow_actions(struct net_device *dev,
bool act_redir = false;
bool act_pedit = false;
bool act_vlan = false;
- LIST_HEAD(actions);
+ int i;
- tcf_exts_to_list(cls->exts, &actions);
- list_for_each_entry(a, &actions, list) {
+ tcf_exts_for_each_action(i, a, cls->exts) {
if (is_tcf_gact_ok(a)) {
/* Do nothing */
} else if (is_tcf_gact_shot(a)) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
index 18eb2aedd4cb..c7d2b4dc7568 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -93,14 +93,13 @@ static int fill_action_fields(struct adapter *adap,
unsigned int num_actions = 0;
const struct tc_action *a;
struct tcf_exts *exts;
- LIST_HEAD(actions);
+ int i;
exts = cls->knode.exts;
if (!tcf_exts_has_actions(exts))
return -EINVAL;
- tcf_exts_to_list(exts, &actions);
- list_for_each_entry(a, &actions, list) {
+ tcf_exts_for_each_action(i, a, exts) {
/* Don't allow more than one action per rule. */
if (num_actions)
return -EINVAL;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index b8f75a22fb6c..f152da1ce046 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -753,7 +753,6 @@ struct cpl_abort_req_rss {
};
struct cpl_abort_req_rss6 {
- WR_HDR;
union opcode_tid ot;
__be32 srqidx_status;
};
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index fa5b30f547f6..cad52bd331f7 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -220,10 +220,10 @@ struct hnae_desc_cb {
/* priv data for the desc, e.g. skb when use with ip stack*/
void *priv;
- u16 page_offset;
- u16 reuse_flag;
+ u32 page_offset;
+ u32 length; /* length of the buffer */
- u16 length; /* length of the buffer */
+ u16 reuse_flag;
/* desc type, used by the ring user to mark the type of the priv data */
u16 type;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 9f2b552aee33..02a0ba20fad5 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -406,113 +406,13 @@ out_net_tx_busy:
return NETDEV_TX_BUSY;
}
-/**
- * hns_nic_get_headlen - determine size of header for RSC/LRO/GRO/FCOE
- * @data: pointer to the start of the headers
- * @max: total length of section to find headers in
- *
- * This function is meant to determine the length of headers that will
- * be recognized by hardware for LRO, GRO, and RSC offloads. The main
- * motivation of doing this is to only perform one pull for IPv4 TCP
- * packets so that we can do basic things like calculating the gso_size
- * based on the average data per packet.
- **/
-static unsigned int hns_nic_get_headlen(unsigned char *data, u32 flag,
- unsigned int max_size)
-{
- unsigned char *network;
- u8 hlen;
-
- /* this should never happen, but better safe than sorry */
- if (max_size < ETH_HLEN)
- return max_size;
-
- /* initialize network frame pointer */
- network = data;
-
- /* set first protocol and move network header forward */
- network += ETH_HLEN;
-
- /* handle any vlan tag if present */
- if (hnae_get_field(flag, HNS_RXD_VLAN_M, HNS_RXD_VLAN_S)
- == HNS_RX_FLAG_VLAN_PRESENT) {
- if ((typeof(max_size))(network - data) > (max_size - VLAN_HLEN))
- return max_size;
-
- network += VLAN_HLEN;
- }
-
- /* handle L3 protocols */
- if (hnae_get_field(flag, HNS_RXD_L3ID_M, HNS_RXD_L3ID_S)
- == HNS_RX_FLAG_L3ID_IPV4) {
- if ((typeof(max_size))(network - data) >
- (max_size - sizeof(struct iphdr)))
- return max_size;
-
- /* access ihl as a u8 to avoid unaligned access on ia64 */
- hlen = (network[0] & 0x0F) << 2;
-
- /* verify hlen meets minimum size requirements */
- if (hlen < sizeof(struct iphdr))
- return network - data;
-
- /* record next protocol if header is present */
- } else if (hnae_get_field(flag, HNS_RXD_L3ID_M, HNS_RXD_L3ID_S)
- == HNS_RX_FLAG_L3ID_IPV6) {
- if ((typeof(max_size))(network - data) >
- (max_size - sizeof(struct ipv6hdr)))
- return max_size;
-
- /* record next protocol */
- hlen = sizeof(struct ipv6hdr);
- } else {
- return network - data;
- }
-
- /* relocate pointer to start of L4 header */
- network += hlen;
-
- /* finally sort out TCP/UDP */
- if (hnae_get_field(flag, HNS_RXD_L4ID_M, HNS_RXD_L4ID_S)
- == HNS_RX_FLAG_L4ID_TCP) {
- if ((typeof(max_size))(network - data) >
- (max_size - sizeof(struct tcphdr)))
- return max_size;
-
- /* access doff as a u8 to avoid unaligned access on ia64 */
- hlen = (network[12] & 0xF0) >> 2;
-
- /* verify hlen meets minimum size requirements */
- if (hlen < sizeof(struct tcphdr))
- return network - data;
-
- network += hlen;
- } else if (hnae_get_field(flag, HNS_RXD_L4ID_M, HNS_RXD_L4ID_S)
- == HNS_RX_FLAG_L4ID_UDP) {
- if ((typeof(max_size))(network - data) >
- (max_size - sizeof(struct udphdr)))
- return max_size;
-
- network += sizeof(struct udphdr);
- }
-
- /* If everything has gone correctly network should be the
- * data section of the packet and will be the end of the header.
- * If not then it probably represents the end of the last recognized
- * header.
- */
- if ((typeof(max_size))(network - data) < max_size)
- return network - data;
- else
- return max_size;
-}
-
static void hns_nic_reuse_page(struct sk_buff *skb, int i,
struct hnae_ring *ring, int pull_len,
struct hnae_desc_cb *desc_cb)
{
struct hnae_desc *desc;
- int truesize, size;
+ u32 truesize;
+ int size;
int last_offset;
bool twobufs;
@@ -530,7 +430,7 @@ static void hns_nic_reuse_page(struct sk_buff *skb, int i,
}
skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
- size - pull_len, truesize - pull_len);
+ size - pull_len, truesize);
/* avoid re-using remote pages,flag default unreuse */
if (unlikely(page_to_nid(desc_cb->priv) != numa_node_id()))
@@ -695,7 +595,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
} else {
ring->stats.seg_pkt_cnt++;
- pull_len = hns_nic_get_headlen(va, bnum_flag, HNS_RX_HEAD_SIZE);
+ pull_len = eth_get_headlen(va, HNS_RX_HEAD_SIZE);
memcpy(__skb_put(skb, pull_len), va,
ALIGN(pull_len, sizeof(long)));
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 3554dca7a680..955c4ab18b03 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2019,7 +2019,8 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
struct hns3_desc_cb *desc_cb)
{
struct hns3_desc *desc;
- int truesize, size;
+ u32 truesize;
+ int size;
int last_offset;
bool twobufs;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index a02a96aee2a2..cb450d7ec8c1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -284,11 +284,11 @@ struct hns3_desc_cb {
/* priv data for the desc, e.g. skb when use with ip stack*/
void *priv;
- u16 page_offset;
- u16 reuse_flag;
-
+ u32 page_offset;
u32 length; /* length of the buffer */
+ u16 reuse_flag;
+
/* desc type, used by the ring user to mark the type of the priv data */
u16 type;
};
diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
index bdb3f8e65ed4..2569a168334c 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
@@ -624,14 +624,14 @@ static int e1000_set_ringparam(struct net_device *netdev,
adapter->tx_ring = tx_old;
e1000_free_all_rx_resources(adapter);
e1000_free_all_tx_resources(adapter);
- kfree(tx_old);
- kfree(rx_old);
adapter->rx_ring = rxdr;
adapter->tx_ring = txdr;
err = e1000_up(adapter);
if (err)
goto err_setup;
}
+ kfree(tx_old);
+ kfree(rx_old);
clear_bit(__E1000_RESETTING, &adapter->flags);
return 0;
@@ -644,7 +644,8 @@ err_setup_rx:
err_alloc_rx:
kfree(txdr);
err_alloc_tx:
- e1000_up(adapter);
+ if (netif_running(adapter->netdev))
+ e1000_up(adapter);
err_setup:
clear_bit(__E1000_RESETTING, &adapter->flags);
return err;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index abcd096ede14..5ff6caa83948 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2013,7 +2013,7 @@ static void i40e_get_stat_strings(struct net_device *netdev, u8 *data)
for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
i40e_add_stat_strings(&data, i40e_gstrings_pfc_stats, i);
- WARN_ONCE(p - data != i40e_get_stats_count(netdev) * ETH_GSTRING_LEN,
+ WARN_ONCE(data - p != i40e_get_stats_count(netdev) * ETH_GSTRING_LEN,
"stat strings count mismatch!");
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f2c622e78802..ac685ad4d877 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -5122,15 +5122,17 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc,
u8 *bw_share)
{
struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
+ struct i40e_pf *pf = vsi->back;
i40e_status ret;
int i;
- if (vsi->back->flags & I40E_FLAG_TC_MQPRIO)
+ /* There is no need to reset BW when mqprio mode is on. */
+ if (pf->flags & I40E_FLAG_TC_MQPRIO)
return 0;
- if (!vsi->mqprio_qopt.qopt.hw) {
+ if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) {
ret = i40e_set_bw_limit(vsi, vsi->seid, 0);
if (ret)
- dev_info(&vsi->back->pdev->dev,
+ dev_info(&pf->pdev->dev,
"Failed to reset tx rate for vsi->seid %u\n",
vsi->seid);
return ret;
@@ -5139,12 +5141,11 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc,
for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
bw_data.tc_bw_credits[i] = bw_share[i];
- ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, vsi->seid, &bw_data,
- NULL);
+ ret = i40e_aq_config_vsi_tc_bw(&pf->hw, vsi->seid, &bw_data, NULL);
if (ret) {
- dev_info(&vsi->back->pdev->dev,
+ dev_info(&pf->pdev->dev,
"AQ command Config VSI BW allocation per TC failed = %d\n",
- vsi->back->hw.aq.asq_last_status);
+ pf->hw.aq.asq_last_status);
return -EINVAL;
}
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index d8b5fff581e7..868f4a1d0f72 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -89,6 +89,13 @@ extern const char ice_drv_ver[];
#define ice_for_each_rxq(vsi, i) \
for ((i) = 0; (i) < (vsi)->num_rxq; (i)++)
+/* Macros for each allocated tx/rx ring whether used or not in a VSI */
+#define ice_for_each_alloc_txq(vsi, i) \
+ for ((i) = 0; (i) < (vsi)->alloc_txq; (i)++)
+
+#define ice_for_each_alloc_rxq(vsi, i) \
+ for ((i) = 0; (i) < (vsi)->alloc_rxq; (i)++)
+
struct ice_tc_info {
u16 qoffset;
u16 qcount;
@@ -189,9 +196,9 @@ struct ice_vsi {
struct list_head tmp_sync_list; /* MAC filters to be synced */
struct list_head tmp_unsync_list; /* MAC filters to be unsynced */
- bool irqs_ready;
- bool current_isup; /* Sync 'link up' logging */
- bool stat_offsets_loaded;
+ u8 irqs_ready;
+ u8 current_isup; /* Sync 'link up' logging */
+ u8 stat_offsets_loaded;
/* queue information */
u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
@@ -262,7 +269,7 @@ struct ice_pf {
struct ice_hw_port_stats stats;
struct ice_hw_port_stats stats_prev;
struct ice_hw hw;
- bool stat_prev_loaded; /* has previous stats been loaded */
+ u8 stat_prev_loaded; /* has previous stats been loaded */
char int_name[ICE_INT_NAME_STR_LEN];
};
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 7541ec2270b3..a0614f472658 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -329,19 +329,19 @@ struct ice_aqc_vsi_props {
/* VLAN section */
__le16 pvid; /* VLANS include priority bits */
u8 pvlan_reserved[2];
- u8 port_vlan_flags;
-#define ICE_AQ_VSI_PVLAN_MODE_S 0
-#define ICE_AQ_VSI_PVLAN_MODE_M (0x3 << ICE_AQ_VSI_PVLAN_MODE_S)
-#define ICE_AQ_VSI_PVLAN_MODE_UNTAGGED 0x1
-#define ICE_AQ_VSI_PVLAN_MODE_TAGGED 0x2
-#define ICE_AQ_VSI_PVLAN_MODE_ALL 0x3
+ u8 vlan_flags;
+#define ICE_AQ_VSI_VLAN_MODE_S 0
+#define ICE_AQ_VSI_VLAN_MODE_M (0x3 << ICE_AQ_VSI_VLAN_MODE_S)
+#define ICE_AQ_VSI_VLAN_MODE_UNTAGGED 0x1
+#define ICE_AQ_VSI_VLAN_MODE_TAGGED 0x2
+#define ICE_AQ_VSI_VLAN_MODE_ALL 0x3
#define ICE_AQ_VSI_PVLAN_INSERT_PVID BIT(2)
-#define ICE_AQ_VSI_PVLAN_EMOD_S 3
-#define ICE_AQ_VSI_PVLAN_EMOD_M (0x3 << ICE_AQ_VSI_PVLAN_EMOD_S)
-#define ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH (0x0 << ICE_AQ_VSI_PVLAN_EMOD_S)
-#define ICE_AQ_VSI_PVLAN_EMOD_STR_UP (0x1 << ICE_AQ_VSI_PVLAN_EMOD_S)
-#define ICE_AQ_VSI_PVLAN_EMOD_STR (0x2 << ICE_AQ_VSI_PVLAN_EMOD_S)
-#define ICE_AQ_VSI_PVLAN_EMOD_NOTHING (0x3 << ICE_AQ_VSI_PVLAN_EMOD_S)
+#define ICE_AQ_VSI_VLAN_EMOD_S 3
+#define ICE_AQ_VSI_VLAN_EMOD_M (0x3 << ICE_AQ_VSI_VLAN_EMOD_S)
+#define ICE_AQ_VSI_VLAN_EMOD_STR_BOTH (0x0 << ICE_AQ_VSI_VLAN_EMOD_S)
+#define ICE_AQ_VSI_VLAN_EMOD_STR_UP (0x1 << ICE_AQ_VSI_VLAN_EMOD_S)
+#define ICE_AQ_VSI_VLAN_EMOD_STR (0x2 << ICE_AQ_VSI_VLAN_EMOD_S)
+#define ICE_AQ_VSI_VLAN_EMOD_NOTHING (0x3 << ICE_AQ_VSI_VLAN_EMOD_S)
u8 pvlan_reserved2[3];
/* ingress egress up sections */
__le32 ingress_table; /* bitmap, 3 bits per up */
@@ -594,6 +594,7 @@ struct ice_sw_rule_lg_act {
#define ICE_LG_ACT_GENERIC_OFFSET_M (0x7 << ICE_LG_ACT_GENERIC_OFFSET_S)
#define ICE_LG_ACT_GENERIC_PRIORITY_S 22
#define ICE_LG_ACT_GENERIC_PRIORITY_M (0x7 << ICE_LG_ACT_GENERIC_PRIORITY_S)
+#define ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX 7
/* Action = 7 - Set Stat count */
#define ICE_LG_ACT_STAT_COUNT 0x7
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 71d032cc5fa7..661beea6af79 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -45,6 +45,9 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw)
/**
* ice_clear_pf_cfg - Clear PF configuration
* @hw: pointer to the hardware structure
+ *
+ * Clears any existing PF configuration (VSIs, VSI lists, switch rules, port
+ * configuration, flow director filters, etc.).
*/
enum ice_status ice_clear_pf_cfg(struct ice_hw *hw)
{
@@ -1483,7 +1486,7 @@ enum ice_status ice_get_link_status(struct ice_port_info *pi, bool *link_up)
struct ice_phy_info *phy_info;
enum ice_status status = 0;
- if (!pi)
+ if (!pi || !link_up)
return ICE_ERR_PARAM;
phy_info = &pi->phy;
@@ -1619,20 +1622,23 @@ __ice_aq_get_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
}
/* LUT size is only valid for Global and PF table types */
- if (lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128) {
- flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128_FLAG <<
- ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) &
- ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M;
- } else if (lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512) {
+ switch (lut_size) {
+ case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128:
+ break;
+ case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512:
flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512_FLAG <<
ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) &
ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M;
- } else if ((lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K) &&
- (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF)) {
- flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG <<
- ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) &
- ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M;
- } else {
+ break;
+ case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K:
+ if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF) {
+ flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG <<
+ ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) &
+ ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M;
+ break;
+ }
+ /* fall-through */
+ default:
status = ICE_ERR_PARAM;
goto ice_aq_get_set_rss_lut_exit;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index 7c511f144ed6..62be72fdc8f3 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -597,10 +597,14 @@ static enum ice_status ice_init_check_adminq(struct ice_hw *hw)
return 0;
init_ctrlq_free_rq:
- ice_shutdown_rq(hw, cq);
- ice_shutdown_sq(hw, cq);
- mutex_destroy(&cq->sq_lock);
- mutex_destroy(&cq->rq_lock);
+ if (cq->rq.head) {
+ ice_shutdown_rq(hw, cq);
+ mutex_destroy(&cq->rq_lock);
+ }
+ if (cq->sq.head) {
+ ice_shutdown_sq(hw, cq);
+ mutex_destroy(&cq->sq_lock);
+ }
return status;
}
@@ -706,10 +710,14 @@ static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
return;
}
- ice_shutdown_sq(hw, cq);
- ice_shutdown_rq(hw, cq);
- mutex_destroy(&cq->sq_lock);
- mutex_destroy(&cq->rq_lock);
+ if (cq->sq.head) {
+ ice_shutdown_sq(hw, cq);
+ mutex_destroy(&cq->sq_lock);
+ }
+ if (cq->rq.head) {
+ ice_shutdown_rq(hw, cq);
+ mutex_destroy(&cq->rq_lock);
+ }
}
/**
@@ -1057,8 +1065,11 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
clean_rq_elem_out:
/* Set pending if needed, unlock and return */
- if (pending)
+ if (pending) {
+ /* re-read HW head to calculate actual pending messages */
+ ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
*pending = (u16)((ntc > ntu ? cq->rq.count : 0) + (ntu - ntc));
+ }
clean_rq_elem_err:
mutex_unlock(&cq->rq_lock);
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 1db304c01d10..c71a9b528d6d 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -26,7 +26,7 @@ static int ice_q_stats_len(struct net_device *netdev)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
- return ((np->vsi->num_txq + np->vsi->num_rxq) *
+ return ((np->vsi->alloc_txq + np->vsi->alloc_rxq) *
(sizeof(struct ice_q_stats) / sizeof(u64)));
}
@@ -218,7 +218,7 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
p += ETH_GSTRING_LEN;
}
- ice_for_each_txq(vsi, i) {
+ ice_for_each_alloc_txq(vsi, i) {
snprintf(p, ETH_GSTRING_LEN,
"tx-queue-%u.tx_packets", i);
p += ETH_GSTRING_LEN;
@@ -226,7 +226,7 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
p += ETH_GSTRING_LEN;
}
- ice_for_each_rxq(vsi, i) {
+ ice_for_each_alloc_rxq(vsi, i) {
snprintf(p, ETH_GSTRING_LEN,
"rx-queue-%u.rx_packets", i);
p += ETH_GSTRING_LEN;
@@ -253,6 +253,24 @@ static int ice_get_sset_count(struct net_device *netdev, int sset)
{
switch (sset) {
case ETH_SS_STATS:
+ /* The number (and order) of strings reported *must* remain
+ * constant for a given netdevice. This function must not
+ * report a different number based on run time parameters
+ * (such as the number of queues in use, or the setting of
+ * a private ethtool flag). This is due to the nature of the
+ * ethtool stats API.
+ *
+ * User space programs such as ethtool must make 3 separate
+ * ioctl requests, one for size, one for the strings, and
+ * finally one for the stats. Since these cross into
+ * user space, changes to the number or size could result in
+ * undefined memory access or incorrect string<->value
+ * correlations for statistics.
+ *
+ * Even if it appears to be safe, changes to the size or
+ * order of strings will suffer from race conditions and are
+ * not safe.
+ */
return ICE_ALL_STATS_LEN(netdev);
default:
return -EOPNOTSUPP;
@@ -280,18 +298,26 @@ ice_get_ethtool_stats(struct net_device *netdev,
/* populate per queue stats */
rcu_read_lock();
- ice_for_each_txq(vsi, j) {
+ ice_for_each_alloc_txq(vsi, j) {
ring = READ_ONCE(vsi->tx_rings[j]);
- if (!ring)
- continue;
- data[i++] = ring->stats.pkts;
- data[i++] = ring->stats.bytes;
+ if (ring) {
+ data[i++] = ring->stats.pkts;
+ data[i++] = ring->stats.bytes;
+ } else {
+ data[i++] = 0;
+ data[i++] = 0;
+ }
}
- ice_for_each_rxq(vsi, j) {
+ ice_for_each_alloc_rxq(vsi, j) {
ring = READ_ONCE(vsi->rx_rings[j]);
- data[i++] = ring->stats.pkts;
- data[i++] = ring->stats.bytes;
+ if (ring) {
+ data[i++] = ring->stats.pkts;
+ data[i++] = ring->stats.bytes;
+ } else {
+ data[i++] = 0;
+ data[i++] = 0;
+ }
}
rcu_read_unlock();
@@ -519,7 +545,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
goto done;
}
- for (i = 0; i < vsi->num_txq; i++) {
+ for (i = 0; i < vsi->alloc_txq; i++) {
/* clone ring and setup updated count */
tx_rings[i] = *vsi->tx_rings[i];
tx_rings[i].count = new_tx_cnt;
@@ -551,7 +577,7 @@ process_rx:
goto done;
}
- for (i = 0; i < vsi->num_rxq; i++) {
+ for (i = 0; i < vsi->alloc_rxq; i++) {
/* clone ring and setup updated count */
rx_rings[i] = *vsi->rx_rings[i];
rx_rings[i].count = new_rx_cnt;
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 499904874b3f..6076fc87df9d 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -121,10 +121,6 @@
#define PFINT_FW_CTL_CAUSE_ENA_S 30
#define PFINT_FW_CTL_CAUSE_ENA_M BIT(PFINT_FW_CTL_CAUSE_ENA_S)
#define PFINT_OICR 0x0016CA00
-#define PFINT_OICR_HLP_RDY_S 14
-#define PFINT_OICR_HLP_RDY_M BIT(PFINT_OICR_HLP_RDY_S)
-#define PFINT_OICR_CPM_RDY_S 15
-#define PFINT_OICR_CPM_RDY_M BIT(PFINT_OICR_CPM_RDY_S)
#define PFINT_OICR_ECC_ERR_S 16
#define PFINT_OICR_ECC_ERR_M BIT(PFINT_OICR_ECC_ERR_S)
#define PFINT_OICR_MAL_DETECT_S 19
@@ -133,10 +129,6 @@
#define PFINT_OICR_GRST_M BIT(PFINT_OICR_GRST_S)
#define PFINT_OICR_PCI_EXCEPTION_S 21
#define PFINT_OICR_PCI_EXCEPTION_M BIT(PFINT_OICR_PCI_EXCEPTION_S)
-#define PFINT_OICR_GPIO_S 22
-#define PFINT_OICR_GPIO_M BIT(PFINT_OICR_GPIO_S)
-#define PFINT_OICR_STORM_DETECT_S 24
-#define PFINT_OICR_STORM_DETECT_M BIT(PFINT_OICR_STORM_DETECT_S)
#define PFINT_OICR_HMC_ERR_S 26
#define PFINT_OICR_HMC_ERR_M BIT(PFINT_OICR_HMC_ERR_S)
#define PFINT_OICR_PE_CRITERR_S 28
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index d23a91665b46..068dbc740b76 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -265,6 +265,7 @@ enum ice_rx_flex_desc_status_error_0_bits {
struct ice_rlan_ctx {
u16 head;
u16 cpuid; /* bigger than needed, see above for reason */
+#define ICE_RLAN_BASE_S 7
u64 base;
u16 qlen;
#define ICE_RLAN_CTX_DBUF_S 7
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 5299caf55a7f..f1e80eed2fd6 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -901,7 +901,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
case ice_aqc_opc_get_link_status:
if (ice_handle_link_event(pf))
dev_err(&pf->pdev->dev,
- "Could not handle link event");
+ "Could not handle link event\n");
break;
default:
dev_dbg(&pf->pdev->dev,
@@ -917,13 +917,27 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
}
/**
+ * ice_ctrlq_pending - check if there is a difference between ntc and ntu
+ * @hw: pointer to hardware info
+ * @cq: control queue information
+ *
+ * returns true if there are pending messages in a queue, false if there aren't
+ */
+static bool ice_ctrlq_pending(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+ u16 ntu;
+
+ ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
+ return cq->rq.next_to_clean != ntu;
+}
+
+/**
* ice_clean_adminq_subtask - clean the AdminQ rings
* @pf: board private structure
*/
static void ice_clean_adminq_subtask(struct ice_pf *pf)
{
struct ice_hw *hw = &pf->hw;
- u32 val;
if (!test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state))
return;
@@ -933,9 +947,13 @@ static void ice_clean_adminq_subtask(struct ice_pf *pf)
clear_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state);
- /* re-enable Admin queue interrupt causes */
- val = rd32(hw, PFINT_FW_CTL);
- wr32(hw, PFINT_FW_CTL, (val | PFINT_FW_CTL_CAUSE_ENA_M));
+ /* There might be a situation where new messages arrive to a control
+ * queue between processing the last message and clearing the
+ * EVENT_PENDING bit. So before exiting, check queue head again (using
+ * ice_ctrlq_pending) and process new messages if any.
+ */
+ if (ice_ctrlq_pending(hw, &hw->adminq))
+ __ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN);
ice_flush(hw);
}
@@ -1295,11 +1313,8 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
qcount = numq_tc;
}
- /* find higher power-of-2 of qcount */
- pow = ilog2(qcount);
-
- if (!is_power_of_2(qcount))
- pow++;
+ /* find the (rounded up) power-of-2 of qcount */
+ pow = order_base_2(qcount);
for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
@@ -1352,14 +1367,15 @@ static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt)
ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
/* Traffic from VSI can be sent to LAN */
ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
- /* Allow all packets untagged/tagged */
- ctxt->info.port_vlan_flags = ((ICE_AQ_VSI_PVLAN_MODE_ALL &
- ICE_AQ_VSI_PVLAN_MODE_M) >>
- ICE_AQ_VSI_PVLAN_MODE_S);
- /* Show VLAN/UP from packets in Rx descriptors */
- ctxt->info.port_vlan_flags |= ((ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH &
- ICE_AQ_VSI_PVLAN_EMOD_M) >>
- ICE_AQ_VSI_PVLAN_EMOD_S);
+
+ /* By default bits 3 and 4 in vlan_flags are 0's which results in legacy
+ * behavior (show VLAN, DEI, and UP) in descriptor. Also, allow all
+ * packets untagged/tagged.
+ */
+ ctxt->info.vlan_flags = ((ICE_AQ_VSI_VLAN_MODE_ALL &
+ ICE_AQ_VSI_VLAN_MODE_M) >>
+ ICE_AQ_VSI_VLAN_MODE_S);
+
/* Have 1:1 UP mapping for both ingress/egress tables */
table |= ICE_UP_TABLE_TRANSLATE(0, 0);
table |= ICE_UP_TABLE_TRANSLATE(1, 1);
@@ -1688,15 +1704,12 @@ static void ice_ena_misc_vector(struct ice_pf *pf)
wr32(hw, PFINT_OICR_ENA, 0); /* disable all */
rd32(hw, PFINT_OICR); /* read to clear */
- val = (PFINT_OICR_HLP_RDY_M |
- PFINT_OICR_CPM_RDY_M |
- PFINT_OICR_ECC_ERR_M |
+ val = (PFINT_OICR_ECC_ERR_M |
PFINT_OICR_MAL_DETECT_M |
PFINT_OICR_GRST_M |
PFINT_OICR_PCI_EXCEPTION_M |
- PFINT_OICR_GPIO_M |
- PFINT_OICR_STORM_DETECT_M |
- PFINT_OICR_HMC_ERR_M);
+ PFINT_OICR_HMC_ERR_M |
+ PFINT_OICR_PE_CRITERR_M);
wr32(hw, PFINT_OICR_ENA, val);
@@ -2058,15 +2071,13 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf)
skip_req_irq:
ice_ena_misc_vector(pf);
- val = (pf->oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) |
- (ICE_RX_ITR & PFINT_OICR_CTL_ITR_INDX_M) |
- PFINT_OICR_CTL_CAUSE_ENA_M;
+ val = ((pf->oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) |
+ PFINT_OICR_CTL_CAUSE_ENA_M);
wr32(hw, PFINT_OICR_CTL, val);
/* This enables Admin queue Interrupt causes */
- val = (pf->oicr_idx & PFINT_FW_CTL_MSIX_INDX_M) |
- (ICE_RX_ITR & PFINT_FW_CTL_ITR_INDX_M) |
- PFINT_FW_CTL_CAUSE_ENA_M;
+ val = ((pf->oicr_idx & PFINT_FW_CTL_MSIX_INDX_M) |
+ PFINT_FW_CTL_CAUSE_ENA_M);
wr32(hw, PFINT_FW_CTL, val);
itr_gran = hw->itr_gran_200;
@@ -3246,8 +3257,10 @@ static void ice_clear_interrupt_scheme(struct ice_pf *pf)
if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
ice_dis_msix(pf);
- devm_kfree(&pf->pdev->dev, pf->irq_tracker);
- pf->irq_tracker = NULL;
+ if (pf->irq_tracker) {
+ devm_kfree(&pf->pdev->dev, pf->irq_tracker);
+ pf->irq_tracker = NULL;
+ }
}
/**
@@ -3271,7 +3284,7 @@ static int ice_probe(struct pci_dev *pdev,
err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev));
if (err) {
- dev_err(&pdev->dev, "I/O map error %d\n", err);
+ dev_err(&pdev->dev, "BAR0 I/O map error %d\n", err);
return err;
}
@@ -3720,10 +3733,10 @@ static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
enum ice_status status;
/* Here we are configuring the VSI to let the driver add VLAN tags by
- * setting port_vlan_flags to ICE_AQ_VSI_PVLAN_MODE_ALL. The actual VLAN
- * tag insertion happens in the Tx hot path, in ice_tx_map.
+ * setting vlan_flags to ICE_AQ_VSI_VLAN_MODE_ALL. The actual VLAN tag
+ * insertion happens in the Tx hot path, in ice_tx_map.
*/
- ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_MODE_ALL;
+ ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL;
ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
ctxt.vsi_num = vsi->vsi_num;
@@ -3735,7 +3748,7 @@ static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
return -EIO;
}
- vsi->info.port_vlan_flags = ctxt.info.port_vlan_flags;
+ vsi->info.vlan_flags = ctxt.info.vlan_flags;
return 0;
}
@@ -3757,12 +3770,15 @@ static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
*/
if (ena) {
/* Strip VLAN tag from Rx packet and put it in the desc */
- ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH;
+ ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_STR_BOTH;
} else {
/* Disable stripping. Leave tag in packet */
- ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_EMOD_NOTHING;
+ ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_NOTHING;
}
+ /* Allow all packets untagged/tagged */
+ ctxt.info.vlan_flags |= ICE_AQ_VSI_VLAN_MODE_ALL;
+
ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
ctxt.vsi_num = vsi->vsi_num;
@@ -3773,7 +3789,7 @@ static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
return -EIO;
}
- vsi->info.port_vlan_flags = ctxt.info.port_vlan_flags;
+ vsi->info.vlan_flags = ctxt.info.vlan_flags;
return 0;
}
@@ -3986,7 +4002,7 @@ static int ice_setup_rx_ctx(struct ice_ring *ring)
/* clear the context structure first */
memset(&rlan_ctx, 0, sizeof(rlan_ctx));
- rlan_ctx.base = ring->dma >> 7;
+ rlan_ctx.base = ring->dma >> ICE_RLAN_BASE_S;
rlan_ctx.qlen = ring->count;
@@ -4098,11 +4114,12 @@ static int ice_vsi_cfg(struct ice_vsi *vsi)
{
int err;
- ice_set_rx_mode(vsi->netdev);
-
- err = ice_restore_vlan(vsi);
- if (err)
- return err;
+ if (vsi->netdev) {
+ ice_set_rx_mode(vsi->netdev);
+ err = ice_restore_vlan(vsi);
+ if (err)
+ return err;
+ }
err = ice_vsi_cfg_txqs(vsi);
if (!err)
@@ -4868,7 +4885,7 @@ int ice_down(struct ice_vsi *vsi)
*/
static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
{
- int i, err;
+ int i, err = 0;
if (!vsi->num_txq) {
dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Tx queues\n",
@@ -4893,7 +4910,7 @@ static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
*/
static int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
{
- int i, err;
+ int i, err = 0;
if (!vsi->num_rxq) {
dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Rx queues\n",
@@ -5235,7 +5252,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
u8 count = 0;
if (new_mtu == netdev->mtu) {
- netdev_warn(netdev, "mtu is already %d\n", netdev->mtu);
+ netdev_warn(netdev, "mtu is already %u\n", netdev->mtu);
return 0;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index 92da0a626ce0..295a8cd87fc1 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -131,9 +131,8 @@ ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data)
*
* This function will request NVM ownership.
*/
-static enum
-ice_status ice_acquire_nvm(struct ice_hw *hw,
- enum ice_aq_res_access_type access)
+static enum ice_status
+ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access)
{
if (hw->nvm.blank_nvm_mode)
return 0;
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index 2e6c1d92cc88..eeae199469b6 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -1576,8 +1576,7 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id, u8 tc,
return status;
}
- if (owner == ICE_SCHED_NODE_OWNER_LAN)
- vsi->max_lanq[tc] = new_numqs;
+ vsi->max_lanq[tc] = new_numqs;
return status;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 723d15f1e90b..6b7ec2ae5ad6 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -645,14 +645,14 @@ ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent,
act |= (1 << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M;
lg_act->pdata.lg_act.act[1] = cpu_to_le32(act);
- act = (7 << ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_VALUE_M;
+ act = (ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX <<
+ ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_OFFSET_M;
/* Third action Marker value */
act |= ICE_LG_ACT_GENERIC;
act |= (sw_marker << ICE_LG_ACT_GENERIC_VALUE_S) &
ICE_LG_ACT_GENERIC_VALUE_M;
- act |= (0 << ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_VALUE_M;
lg_act->pdata.lg_act.act[2] = cpu_to_le32(act);
/* call the fill switch rule to fill the lookup tx rx structure */
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index 6f4a0d159dbf..9b8ec128ee31 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -17,7 +17,7 @@ struct ice_vsi_ctx {
u16 vsis_unallocated;
u16 flags;
struct ice_aqc_vsi_props info;
- bool alloc_from_pool;
+ u8 alloc_from_pool;
};
enum ice_sw_fwd_act_type {
@@ -94,8 +94,8 @@ struct ice_fltr_info {
u8 qgrp_size;
/* Rule creations populate these indicators basing on the switch type */
- bool lb_en; /* Indicate if packet can be looped back */
- bool lan_en; /* Indicate if packet can be forwarded to the uplink */
+ u8 lb_en; /* Indicate if packet can be looped back */
+ u8 lan_en; /* Indicate if packet can be forwarded to the uplink */
};
/* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list id */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 567067b650c4..31bc998fe200 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -143,7 +143,7 @@ struct ice_ring {
u16 next_to_use;
u16 next_to_clean;
- bool ring_active; /* is ring online or not */
+ u8 ring_active; /* is ring online or not */
/* stats structs */
struct ice_q_stats stats;
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 99c8a9a71b5e..97c366e0ca59 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -83,7 +83,7 @@ struct ice_link_status {
u64 phy_type_low;
u16 max_frame_size;
u16 link_speed;
- bool lse_ena; /* Link Status Event notification */
+ u8 lse_ena; /* Link Status Event notification */
u8 link_info;
u8 an_info;
u8 ext_info;
@@ -101,7 +101,7 @@ struct ice_phy_info {
struct ice_link_status link_info_old;
u64 phy_type_low;
enum ice_media_type media_type;
- bool get_link_info;
+ u8 get_link_info;
};
/* Common HW capabilities for SW use */
@@ -167,7 +167,7 @@ struct ice_nvm_info {
u32 oem_ver; /* OEM version info */
u16 sr_words; /* Shadow RAM size in words */
u16 ver; /* NVM package version */
- bool blank_nvm_mode; /* is NVM empty (no FW present) */
+ u8 blank_nvm_mode; /* is NVM empty (no FW present) */
};
/* Max number of port to queue branches w.r.t topology */
@@ -181,7 +181,7 @@ struct ice_sched_node {
struct ice_aqc_txsched_elem_data info;
u32 agg_id; /* aggregator group id */
u16 vsi_id;
- bool in_use; /* suspended or in use */
+ u8 in_use; /* suspended or in use */
u8 tx_sched_layer; /* Logical Layer (1-9) */
u8 num_children;
u8 tc_num;
@@ -218,7 +218,7 @@ struct ice_sched_vsi_info {
struct ice_sched_tx_policy {
u16 max_num_vsis;
u8 max_num_lan_qs_per_tc[ICE_MAX_TRAFFIC_CLASS];
- bool rdma_ena;
+ u8 rdma_ena;
};
struct ice_port_info {
@@ -243,7 +243,7 @@ struct ice_port_info {
struct list_head agg_list; /* lists all aggregator */
u8 lport;
#define ICE_LPORT_MASK 0xff
- bool is_vf;
+ u8 is_vf;
};
struct ice_switch_info {
@@ -287,7 +287,7 @@ struct ice_hw {
u8 max_cgds;
u8 sw_entry_point_layer;
- bool evb_veb; /* true for VEB, false for VEPA */
+ u8 evb_veb; /* true for VEB, false for VEPA */
struct ice_bus_info bus;
struct ice_nvm_info nvm;
struct ice_hw_dev_caps dev_caps; /* device capabilities */
@@ -318,7 +318,7 @@ struct ice_hw {
u8 itr_gran_100;
u8 itr_gran_50;
u8 itr_gran_25;
- bool ucast_shared; /* true if VSIs can share unicast addr */
+ u8 ucast_shared; /* true if VSIs can share unicast addr */
};
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index f92f7918112d..5acf3b743876 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -1649,7 +1649,7 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter)
if (hw->phy.type == e1000_phy_m88)
igb_phy_disable_receiver(adapter);
- mdelay(500);
+ msleep(500);
return 0;
}
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index d03c2f0d7592..a32c576c1e65 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -3873,7 +3873,7 @@ static int igb_sw_init(struct igb_adapter *adapter)
adapter->mac_table = kcalloc(hw->mac.rar_entry_count,
sizeof(struct igb_mac_addr),
- GFP_ATOMIC);
+ GFP_KERNEL);
if (!adapter->mac_table)
return -ENOMEM;
@@ -3883,7 +3883,7 @@ static int igb_sw_init(struct igb_adapter *adapter)
/* Setup and initialize a copy of the hw vlan table array */
adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
- GFP_ATOMIC);
+ GFP_KERNEL);
if (!adapter->shadow_vfta)
return -ENOMEM;
@@ -5816,7 +5816,8 @@ static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
if (skb->ip_summed != CHECKSUM_PARTIAL) {
csum_failed:
- if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
+ if (!(first->tx_flags & IGB_TX_FLAGS_VLAN) &&
+ !tx_ring->launchtime_enable)
return;
goto no_csum;
}
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index 43664adf7a3c..d3e72d0f66ef 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -771,14 +771,13 @@ ixgb_setup_rx_resources(struct ixgb_adapter *adapter)
rxdr->size = rxdr->count * sizeof(struct ixgb_rx_desc);
rxdr->size = ALIGN(rxdr->size, 4096);
- rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma,
- GFP_KERNEL);
+ rxdr->desc = dma_zalloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma,
+ GFP_KERNEL);
if (!rxdr->desc) {
vfree(rxdr->buffer_info);
return -ENOMEM;
}
- memset(rxdr->desc, 0, rxdr->size);
rxdr->next_to_clean = 0;
rxdr->next_to_use = 0;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
index 94b3165ff543..ccd852ad62a4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
@@ -192,7 +192,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
}
/* alloc the udl from per cpu ddp pool */
- ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_ATOMIC, &ddp->udp);
+ ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_KERNEL, &ddp->udp);
if (!ddp->udl) {
e_err(drv, "failed allocated ddp context\n");
goto out_noddp_unmap;
@@ -760,7 +760,7 @@ int ixgbe_setup_fcoe_ddp_resources(struct ixgbe_adapter *adapter)
return 0;
/* Extra buffer to be shared by all DDPs for HW work around */
- buffer = kmalloc(IXGBE_FCBUFF_MIN, GFP_ATOMIC);
+ buffer = kmalloc(IXGBE_FCBUFF_MIN, GFP_KERNEL);
if (!buffer)
return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 447098005490..9a23d33a47ed 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6201,7 +6201,7 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter,
adapter->mac_table = kcalloc(hw->mac.num_rar_entries,
sizeof(struct ixgbe_mac_addr),
- GFP_ATOMIC);
+ GFP_KERNEL);
if (!adapter->mac_table)
return -ENOMEM;
@@ -6620,8 +6620,18 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu)
struct ixgbe_adapter *adapter = netdev_priv(netdev);
if (adapter->xdp_prog) {
- e_warn(probe, "MTU cannot be changed while XDP program is loaded\n");
- return -EPERM;
+ int new_frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN +
+ VLAN_HLEN;
+ int i;
+
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct ixgbe_ring *ring = adapter->rx_ring[i];
+
+ if (new_frame_size > ixgbe_rx_bufsz(ring)) {
+ e_warn(probe, "Requested MTU size is not supported with XDP\n");
+ return -EINVAL;
+ }
+ }
}
/*
@@ -8983,6 +8993,15 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc)
#ifdef CONFIG_IXGBE_DCB
if (tc) {
+ if (adapter->xdp_prog) {
+ e_warn(probe, "DCB is not supported with XDP\n");
+
+ ixgbe_init_interrupt_scheme(adapter);
+ if (netif_running(dev))
+ ixgbe_open(dev);
+ return -EINVAL;
+ }
+
netdev_set_num_tc(dev, tc);
ixgbe_set_prio_tc_map(adapter);
@@ -9171,14 +9190,12 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter,
struct tcf_exts *exts, u64 *action, u8 *queue)
{
const struct tc_action *a;
- LIST_HEAD(actions);
+ int i;
if (!tcf_exts_has_actions(exts))
return -EINVAL;
- tcf_exts_to_list(exts, &actions);
- list_for_each_entry(a, &actions, list) {
-
+ tcf_exts_for_each_action(i, a, exts) {
/* Drop action */
if (is_tcf_gact_shot(a)) {
*action = IXGBE_FDIR_DROP_QUEUE;
@@ -9936,6 +9953,11 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
int tcs = adapter->hw_tcs ? : 1;
int pool, err;
+ if (adapter->xdp_prog) {
+ e_warn(probe, "L2FW offload is not supported with XDP\n");
+ return ERR_PTR(-EINVAL);
+ }
+
/* The hardware supported by ixgbe only filters on the destination MAC
* address. In order to avoid issues we only support offloading modes
* where the hardware can actually provide the functionality.
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 6f59933cdff7..3c6f01c41b78 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -53,6 +53,11 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter,
struct ixgbe_hw *hw = &adapter->hw;
int i;
+ if (adapter->xdp_prog) {
+ e_warn(probe, "SRIOV is not supported with XDP\n");
+ return -EINVAL;
+ }
+
/* Enable VMDq flag so device will be set in VM mode */
adapter->flags |= IXGBE_FLAG_SRIOV_ENABLED |
IXGBE_FLAG_VMDQ_ENABLED;
@@ -688,8 +693,13 @@ static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter,
static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
{
struct ixgbe_hw *hw = &adapter->hw;
+ struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
+ u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
u8 num_tcs = adapter->hw_tcs;
+ u32 reg_val;
+ u32 queue;
+ u32 word;
/* remove VLAN filters beloning to this VF */
ixgbe_clear_vf_vlans(adapter, vf);
@@ -726,6 +736,27 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
/* reset VF api back to unknown */
adapter->vfinfo[vf].vf_api = ixgbe_mbox_api_10;
+
+ /* Restart each queue for given VF */
+ for (queue = 0; queue < q_per_pool; queue++) {
+ unsigned int reg_idx = (vf * q_per_pool) + queue;
+
+ reg_val = IXGBE_READ_REG(hw, IXGBE_PVFTXDCTL(reg_idx));
+
+ /* Re-enabling only configured queues */
+ if (reg_val) {
+ reg_val |= IXGBE_TXDCTL_ENABLE;
+ IXGBE_WRITE_REG(hw, IXGBE_PVFTXDCTL(reg_idx), reg_val);
+ reg_val &= ~IXGBE_TXDCTL_ENABLE;
+ IXGBE_WRITE_REG(hw, IXGBE_PVFTXDCTL(reg_idx), reg_val);
+ }
+ }
+
+ /* Clear VF's mailbox memory */
+ for (word = 0; word < IXGBE_VFMAILBOX_SIZE; word++)
+ IXGBE_WRITE_REG_ARRAY(hw, IXGBE_PFMBMEM(vf), word, 0);
+
+ IXGBE_WRITE_FLUSH(hw);
}
static int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 44cfb2021145..41bcbb337e83 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -2518,6 +2518,7 @@ enum {
/* Translated register #defines */
#define IXGBE_PVFTDH(P) (0x06010 + (0x40 * (P)))
#define IXGBE_PVFTDT(P) (0x06018 + (0x40 * (P)))
+#define IXGBE_PVFTXDCTL(P) (0x06028 + (0x40 * (P)))
#define IXGBE_PVFTDWBAL(P) (0x06038 + (0x40 * (P)))
#define IXGBE_PVFTDWBAH(P) (0x0603C + (0x40 * (P)))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 8cac8e9c8c63..25a9b8a8f63e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1978,14 +1978,15 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
goto out_ok;
modify_ip_header = false;
- tcf_exts_to_list(exts, &actions);
- list_for_each_entry(a, &actions, list) {
+ tcf_exts_for_each_action(i, a, exts) {
+ int k;
+
if (!is_tcf_pedit(a))
continue;
nkeys = tcf_pedit_nkeys(a);
- for (i = 0; i < nkeys; i++) {
- htype = tcf_pedit_htype(a, i);
+ for (k = 0; k < nkeys; k++) {
+ htype = tcf_pedit_htype(a, k);
if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 ||
htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) {
modify_ip_header = true;
@@ -2049,15 +2050,14 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
const struct tc_action *a;
LIST_HEAD(actions);
u32 action = 0;
- int err;
+ int err, i;
if (!tcf_exts_has_actions(exts))
return -EINVAL;
attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
- tcf_exts_to_list(exts, &actions);
- list_for_each_entry(a, &actions, list) {
+ tcf_exts_for_each_action(i, a, exts) {
if (is_tcf_gact_shot(a)) {
action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
if (MLX5_CAP_FLOWTABLE(priv->mdev,
@@ -2666,7 +2666,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
LIST_HEAD(actions);
bool encap = false;
u32 action = 0;
- int err;
+ int err, i;
if (!tcf_exts_has_actions(exts))
return -EINVAL;
@@ -2674,8 +2674,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
attr->in_rep = rpriv->rep;
attr->in_mdev = priv->mdev;
- tcf_exts_to_list(exts, &actions);
- list_for_each_entry(a, &actions, list) {
+ tcf_exts_for_each_action(i, a, exts) {
if (is_tcf_gact_shot(a)) {
action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 6070d1591d1e..930700413b1d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1346,8 +1346,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
return -ENOMEM;
mall_tc_entry->cookie = f->cookie;
- tcf_exts_to_list(f->exts, &actions);
- a = list_first_entry(&actions, struct tc_action, list);
+ a = tcf_exts_first_action(f->exts);
if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) {
struct mlxsw_sp_port_mall_mirror_tc_entry *mirror;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 3ae930196741..3cdb7aca90b7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -414,6 +414,8 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
void
mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
+void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *dev);
/* spectrum_kvdl.c */
enum mlxsw_sp_kvdl_entry_type {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index ebd1b24ebaa5..8d211972c5e9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -21,8 +21,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
struct netlink_ext_ack *extack)
{
const struct tc_action *a;
- LIST_HEAD(actions);
- int err;
+ int err, i;
if (!tcf_exts_has_actions(exts))
return 0;
@@ -32,8 +31,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
if (err)
return err;
- tcf_exts_to_list(exts, &actions);
- list_for_each_entry(a, &actions, list) {
+ tcf_exts_for_each_action(i, a, exts) {
if (is_tcf_gact_ok(a)) {
err = mlxsw_sp_acl_rulei_act_terminate(rulei);
if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 3a96307f51b0..2ab9cf25a08a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -6234,6 +6234,17 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
mlxsw_sp_vr_put(mlxsw_sp, vr);
}
+void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *dev)
+{
+ struct mlxsw_sp_rif *rif;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+ if (!rif)
+ return;
+ mlxsw_sp_rif_destroy(rif);
+}
+
static void
mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 0d8444aaba01..db715da7bab7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -127,6 +127,24 @@ bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
}
+static int mlxsw_sp_bridge_device_upper_rif_destroy(struct net_device *dev,
+ void *data)
+{
+ struct mlxsw_sp *mlxsw_sp = data;
+
+ mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev);
+ return 0;
+}
+
+static void mlxsw_sp_bridge_device_rifs_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *dev)
+{
+ mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev);
+ netdev_walk_all_upper_dev_rcu(dev,
+ mlxsw_sp_bridge_device_upper_rif_destroy,
+ mlxsw_sp);
+}
+
static struct mlxsw_sp_bridge_device *
mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge,
struct net_device *br_dev)
@@ -165,6 +183,8 @@ static void
mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge,
struct mlxsw_sp_bridge_device *bridge_device)
{
+ mlxsw_sp_bridge_device_rifs_destroy(bridge->mlxsw_sp,
+ bridge_device->dev);
list_del(&bridge_device->list);
if (bridge_device->vlan_enabled)
bridge->vlan_enabled_exists = false;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 0ba0356ec4e6..9044496803e6 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -796,11 +796,10 @@ int nfp_flower_compile_action(struct nfp_app *app,
struct net_device *netdev,
struct nfp_fl_payload *nfp_flow)
{
- int act_len, act_cnt, err, tun_out_cnt, out_cnt;
+ int act_len, act_cnt, err, tun_out_cnt, out_cnt, i;
enum nfp_flower_tun_type tun_type;
const struct tc_action *a;
u32 csum_updated = 0;
- LIST_HEAD(actions);
memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ);
nfp_flow->meta.act_len = 0;
@@ -810,8 +809,7 @@ int nfp_flower_compile_action(struct nfp_app *app,
tun_out_cnt = 0;
out_cnt = 0;
- tcf_exts_to_list(flow->exts, &actions);
- list_for_each_entry(a, &actions, list) {
+ tcf_exts_for_each_action(i, a, flow->exts) {
err = nfp_flower_loop_action(app, a, flow, nfp_flow, &act_len,
netdev, &tun_type, &tun_out_cnt,
&out_cnt, &csum_updated);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
index d9ab5add27a8..34193c2f1699 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
@@ -407,7 +407,7 @@ static void qed_init_cmd_rd(struct qed_hwfn *p_hwfn,
if (i == QED_INIT_MAX_POLL_COUNT) {
DP_ERR(p_hwfn,
- "Timeout when polling reg: 0x%08x [ Waiting-for: %08x Got: %08x (comparsion %08x)]\n",
+ "Timeout when polling reg: 0x%08x [ Waiting-for: %08x Got: %08x (comparison %08x)]\n",
addr, le32_to_cpu(cmd->expected_val),
val, le32_to_cpu(cmd->op_data));
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index d89a0e22f6e4..5d37ec7e9b0b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -48,7 +48,7 @@
#include "qed_reg_addr.h"
#include "qed_sriov.h"
-#define CHIP_MCP_RESP_ITER_US 10
+#define QED_MCP_RESP_ITER_US 10
#define QED_DRV_MB_MAX_RETRIES (500 * 1000) /* Account for 5 sec */
#define QED_MCP_RESET_RETRIES (50 * 1000) /* Account for 500 msec */
@@ -183,18 +183,57 @@ int qed_mcp_free(struct qed_hwfn *p_hwfn)
return 0;
}
+/* Maximum of 1 sec to wait for the SHMEM ready indication */
+#define QED_MCP_SHMEM_RDY_MAX_RETRIES 20
+#define QED_MCP_SHMEM_RDY_ITER_MS 50
+
static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
struct qed_mcp_info *p_info = p_hwfn->mcp_info;
+ u8 cnt = QED_MCP_SHMEM_RDY_MAX_RETRIES;
+ u8 msec = QED_MCP_SHMEM_RDY_ITER_MS;
u32 drv_mb_offsize, mfw_mb_offsize;
u32 mcp_pf_id = MCP_PF_ID(p_hwfn);
p_info->public_base = qed_rd(p_hwfn, p_ptt, MISC_REG_SHARED_MEM_ADDR);
- if (!p_info->public_base)
- return 0;
+ if (!p_info->public_base) {
+ DP_NOTICE(p_hwfn,
+ "The address of the MCP scratch-pad is not configured\n");
+ return -EINVAL;
+ }
p_info->public_base |= GRCBASE_MCP;
+ /* Get the MFW MB address and number of supported messages */
+ mfw_mb_offsize = qed_rd(p_hwfn, p_ptt,
+ SECTION_OFFSIZE_ADDR(p_info->public_base,
+ PUBLIC_MFW_MB));
+ p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id);
+ p_info->mfw_mb_length = (u16)qed_rd(p_hwfn, p_ptt,
+ p_info->mfw_mb_addr +
+ offsetof(struct public_mfw_mb,
+ sup_msgs));
+
+ /* The driver can notify that there was an MCP reset, and might read the
+ * SHMEM values before the MFW has completed initializing them.
+ * To avoid this, the "sup_msgs" field in the MFW mailbox is used as a
+ * data ready indication.
+ */
+ while (!p_info->mfw_mb_length && --cnt) {
+ msleep(msec);
+ p_info->mfw_mb_length =
+ (u16)qed_rd(p_hwfn, p_ptt,
+ p_info->mfw_mb_addr +
+ offsetof(struct public_mfw_mb, sup_msgs));
+ }
+
+ if (!cnt) {
+ DP_NOTICE(p_hwfn,
+ "Failed to get the SHMEM ready notification after %d msec\n",
+ QED_MCP_SHMEM_RDY_MAX_RETRIES * msec);
+ return -EBUSY;
+ }
+
/* Calculate the driver and MFW mailbox address */
drv_mb_offsize = qed_rd(p_hwfn, p_ptt,
SECTION_OFFSIZE_ADDR(p_info->public_base,
@@ -204,13 +243,6 @@ static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
"drv_mb_offsiz = 0x%x, drv_mb_addr = 0x%x mcp_pf_id = 0x%x\n",
drv_mb_offsize, p_info->drv_mb_addr, mcp_pf_id);
- /* Set the MFW MB address */
- mfw_mb_offsize = qed_rd(p_hwfn, p_ptt,
- SECTION_OFFSIZE_ADDR(p_info->public_base,
- PUBLIC_MFW_MB));
- p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id);
- p_info->mfw_mb_length = (u16)qed_rd(p_hwfn, p_ptt, p_info->mfw_mb_addr);
-
/* Get the current driver mailbox sequence before sending
* the first command
*/
@@ -285,9 +317,15 @@ static void qed_mcp_reread_offsets(struct qed_hwfn *p_hwfn,
int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
- u32 org_mcp_reset_seq, seq, delay = CHIP_MCP_RESP_ITER_US, cnt = 0;
+ u32 org_mcp_reset_seq, seq, delay = QED_MCP_RESP_ITER_US, cnt = 0;
int rc = 0;
+ if (p_hwfn->mcp_info->b_block_cmd) {
+ DP_NOTICE(p_hwfn,
+ "The MFW is not responsive. Avoid sending MCP_RESET mailbox command.\n");
+ return -EBUSY;
+ }
+
/* Ensure that only a single thread is accessing the mailbox */
spin_lock_bh(&p_hwfn->mcp_info->cmd_lock);
@@ -413,14 +451,41 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
(p_mb_params->cmd | seq_num), p_mb_params->param);
}
+static void qed_mcp_cmd_set_blocking(struct qed_hwfn *p_hwfn, bool block_cmd)
+{
+ p_hwfn->mcp_info->b_block_cmd = block_cmd;
+
+ DP_INFO(p_hwfn, "%s sending of mailbox commands to the MFW\n",
+ block_cmd ? "Block" : "Unblock");
+}
+
+static void qed_mcp_print_cpu_info(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt)
+{
+ u32 cpu_mode, cpu_state, cpu_pc_0, cpu_pc_1, cpu_pc_2;
+ u32 delay = QED_MCP_RESP_ITER_US;
+
+ cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
+ cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE);
+ cpu_pc_0 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER);
+ udelay(delay);
+ cpu_pc_1 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER);
+ udelay(delay);
+ cpu_pc_2 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER);
+
+ DP_NOTICE(p_hwfn,
+ "MCP CPU info: mode 0x%08x, state 0x%08x, pc {0x%08x, 0x%08x, 0x%08x}\n",
+ cpu_mode, cpu_state, cpu_pc_0, cpu_pc_1, cpu_pc_2);
+}
+
static int
_qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
struct qed_mcp_mb_params *p_mb_params,
- u32 max_retries, u32 delay)
+ u32 max_retries, u32 usecs)
{
+ u32 cnt = 0, msecs = DIV_ROUND_UP(usecs, 1000);
struct qed_mcp_cmd_elem *p_cmd_elem;
- u32 cnt = 0;
u16 seq_num;
int rc = 0;
@@ -443,7 +508,11 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
goto err;
spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock);
- udelay(delay);
+
+ if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP))
+ msleep(msecs);
+ else
+ udelay(usecs);
} while (++cnt < max_retries);
if (cnt >= max_retries) {
@@ -472,7 +541,11 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
* The spinlock stays locked until the list element is removed.
*/
- udelay(delay);
+ if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP))
+ msleep(msecs);
+ else
+ udelay(usecs);
+
spin_lock_bh(&p_hwfn->mcp_info->cmd_lock);
if (p_cmd_elem->b_is_completed)
@@ -491,11 +564,15 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
DP_NOTICE(p_hwfn,
"The MFW failed to respond to command 0x%08x [param 0x%08x].\n",
p_mb_params->cmd, p_mb_params->param);
+ qed_mcp_print_cpu_info(p_hwfn, p_ptt);
spin_lock_bh(&p_hwfn->mcp_info->cmd_lock);
qed_mcp_cmd_del_elem(p_hwfn, p_cmd_elem);
spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock);
+ if (!QED_MB_FLAGS_IS_SET(p_mb_params, AVOID_BLOCK))
+ qed_mcp_cmd_set_blocking(p_hwfn, true);
+
return -EAGAIN;
}
@@ -507,7 +584,7 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
"MFW mailbox: response 0x%08x param 0x%08x [after %d.%03d ms]\n",
p_mb_params->mcp_resp,
p_mb_params->mcp_param,
- (cnt * delay) / 1000, (cnt * delay) % 1000);
+ (cnt * usecs) / 1000, (cnt * usecs) % 1000);
/* Clear the sequence number from the MFW response */
p_mb_params->mcp_resp &= FW_MSG_CODE_MASK;
@@ -525,7 +602,7 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
{
size_t union_data_size = sizeof(union drv_union_data);
u32 max_retries = QED_DRV_MB_MAX_RETRIES;
- u32 delay = CHIP_MCP_RESP_ITER_US;
+ u32 usecs = QED_MCP_RESP_ITER_US;
/* MCP not initialized */
if (!qed_mcp_is_init(p_hwfn)) {
@@ -533,6 +610,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
return -EBUSY;
}
+ if (p_hwfn->mcp_info->b_block_cmd) {
+ DP_NOTICE(p_hwfn,
+ "The MFW is not responsive. Avoid sending mailbox command 0x%08x [param 0x%08x].\n",
+ p_mb_params->cmd, p_mb_params->param);
+ return -EBUSY;
+ }
+
if (p_mb_params->data_src_size > union_data_size ||
p_mb_params->data_dst_size > union_data_size) {
DP_ERR(p_hwfn,
@@ -542,8 +626,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
return -EINVAL;
}
+ if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) {
+ max_retries = DIV_ROUND_UP(max_retries, 1000);
+ usecs *= 1000;
+ }
+
return _qed_mcp_cmd_and_union(p_hwfn, p_ptt, p_mb_params, max_retries,
- delay);
+ usecs);
}
int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
@@ -761,6 +850,7 @@ __qed_mcp_load_req(struct qed_hwfn *p_hwfn,
mb_params.data_src_size = sizeof(load_req);
mb_params.p_data_dst = &load_rsp;
mb_params.data_dst_size = sizeof(load_rsp);
+ mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK;
DP_VERBOSE(p_hwfn, QED_MSG_SP,
"Load Request: param 0x%08x [init_hw %d, drv_type %d, hsi_ver %d, pda 0x%04x]\n",
@@ -982,7 +1072,8 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
- u32 wol_param, mcp_resp, mcp_param;
+ struct qed_mcp_mb_params mb_params;
+ u32 wol_param;
switch (p_hwfn->cdev->wol_config) {
case QED_OV_WOL_DISABLED:
@@ -1000,8 +1091,12 @@ int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
wol_param = DRV_MB_PARAM_UNLOAD_WOL_MCP;
}
- return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_UNLOAD_REQ, wol_param,
- &mcp_resp, &mcp_param);
+ memset(&mb_params, 0, sizeof(mb_params));
+ mb_params.cmd = DRV_MSG_CODE_UNLOAD_REQ;
+ mb_params.param = wol_param;
+ mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK;
+
+ return qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
}
int qed_mcp_unload_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
@@ -2077,31 +2172,65 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
return rc;
}
+/* A maximal 100 msec waiting time for the MCP to halt */
+#define QED_MCP_HALT_SLEEP_MS 10
+#define QED_MCP_HALT_MAX_RETRIES 10
+
int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
- u32 resp = 0, param = 0;
+ u32 resp = 0, param = 0, cpu_state, cnt = 0;
int rc;
rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MCP_HALT, 0, &resp,
&param);
- if (rc)
+ if (rc) {
DP_ERR(p_hwfn, "MCP response failure, aborting\n");
+ return rc;
+ }
- return rc;
+ do {
+ msleep(QED_MCP_HALT_SLEEP_MS);
+ cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE);
+ if (cpu_state & MCP_REG_CPU_STATE_SOFT_HALTED)
+ break;
+ } while (++cnt < QED_MCP_HALT_MAX_RETRIES);
+
+ if (cnt == QED_MCP_HALT_MAX_RETRIES) {
+ DP_NOTICE(p_hwfn,
+ "Failed to halt the MCP [CPU_MODE = 0x%08x, CPU_STATE = 0x%08x]\n",
+ qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE), cpu_state);
+ return -EBUSY;
+ }
+
+ qed_mcp_cmd_set_blocking(p_hwfn, true);
+
+ return 0;
}
+#define QED_MCP_RESUME_SLEEP_MS 10
+
int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
- u32 value, cpu_mode;
+ u32 cpu_mode, cpu_state;
qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_STATE, 0xffffffff);
- value = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
- value &= ~MCP_REG_CPU_MODE_SOFT_HALT;
- qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, value);
cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
+ cpu_mode &= ~MCP_REG_CPU_MODE_SOFT_HALT;
+ qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, cpu_mode);
+ msleep(QED_MCP_RESUME_SLEEP_MS);
+ cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE);
- return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0;
+ if (cpu_state & MCP_REG_CPU_STATE_SOFT_HALTED) {
+ DP_NOTICE(p_hwfn,
+ "Failed to resume the MCP [CPU_MODE = 0x%08x, CPU_STATE = 0x%08x]\n",
+ cpu_mode, cpu_state);
+ return -EBUSY;
+ }
+
+ qed_mcp_cmd_set_blocking(p_hwfn, false);
+
+ return 0;
}
int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 047976d5c6e9..85e6b3989e7a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -635,11 +635,14 @@ struct qed_mcp_info {
*/
spinlock_t cmd_lock;
+ /* Flag to indicate whether sending a MFW mailbox command is blocked */
+ bool b_block_cmd;
+
/* Spinlock used for syncing SW link-changes and link-changes
* originating from attention context.
*/
spinlock_t link_lock;
- bool block_mb_sending;
+
u32 public_base;
u32 drv_mb_addr;
u32 mfw_mb_addr;
@@ -660,14 +663,20 @@ struct qed_mcp_info {
};
struct qed_mcp_mb_params {
- u32 cmd;
- u32 param;
- void *p_data_src;
- u8 data_src_size;
- void *p_data_dst;
- u8 data_dst_size;
- u32 mcp_resp;
- u32 mcp_param;
+ u32 cmd;
+ u32 param;
+ void *p_data_src;
+ void *p_data_dst;
+ u8 data_src_size;
+ u8 data_dst_size;
+ u32 mcp_resp;
+ u32 mcp_param;
+ u32 flags;
+#define QED_MB_FLAG_CAN_SLEEP (0x1 << 0)
+#define QED_MB_FLAG_AVOID_BLOCK (0x1 << 1)
+#define QED_MB_FLAGS_IS_SET(params, flag) \
+ ({ typeof(params) __params = (params); \
+ (__params && (__params->flags & QED_MB_FLAG_ ## flag)); })
};
struct qed_drv_tlv_hdr {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index d8ad2dcad8d5..f736f70956fd 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -562,8 +562,10 @@
0
#define MCP_REG_CPU_STATE \
0xe05004UL
+#define MCP_REG_CPU_STATE_SOFT_HALTED (0x1UL << 10)
#define MCP_REG_CPU_EVENT_MASK \
0xe05008UL
+#define MCP_REG_CPU_PROGRAM_COUNTER 0xe0501cUL
#define PGLUE_B_REG_PF_BAR0_SIZE \
0x2aae60UL
#define PGLUE_B_REG_PF_BAR1_SIZE \
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index 9673d19308e6..b16ce7d93caf 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -2006,18 +2006,16 @@ unlock:
static int qede_parse_actions(struct qede_dev *edev,
struct tcf_exts *exts)
{
- int rc = -EINVAL, num_act = 0;
+ int rc = -EINVAL, num_act = 0, i;
const struct tc_action *a;
bool is_drop = false;
- LIST_HEAD(actions);
if (!tcf_exts_has_actions(exts)) {
DP_NOTICE(edev, "No tc actions received\n");
return rc;
}
- tcf_exts_to_list(exts, &actions);
- list_for_each_entry(a, &actions, list) {
+ tcf_exts_for_each_action(i, a, exts) {
num_act++;
if (is_tcf_gact_shot(a))
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 353f1c129af1..059ba9429e51 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -2384,26 +2384,20 @@ static int qlge_update_hw_vlan_features(struct net_device *ndev,
return status;
}
-static netdev_features_t qlge_fix_features(struct net_device *ndev,
- netdev_features_t features)
-{
- int err;
-
- /* Update the behavior of vlan accel in the adapter */
- err = qlge_update_hw_vlan_features(ndev, features);
- if (err)
- return err;
-
- return features;
-}
-
static int qlge_set_features(struct net_device *ndev,
netdev_features_t features)
{
netdev_features_t changed = ndev->features ^ features;
+ int err;
+
+ if (changed & NETIF_F_HW_VLAN_CTAG_RX) {
+ /* Update the behavior of vlan accel in the adapter */
+ err = qlge_update_hw_vlan_features(ndev, features);
+ if (err)
+ return err;
- if (changed & NETIF_F_HW_VLAN_CTAG_RX)
qlge_vlan_mode(ndev, features);
+ }
return 0;
}
@@ -4719,7 +4713,6 @@ static const struct net_device_ops qlge_netdev_ops = {
.ndo_set_mac_address = qlge_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
.ndo_tx_timeout = qlge_tx_timeout,
- .ndo_fix_features = qlge_fix_features,
.ndo_set_features = qlge_set_features,
.ndo_vlan_rx_add_vid = qlge_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = qlge_vlan_rx_kill_vid,
diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index b81f4faf7b10..1470fc12282b 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Renesas Ethernet AVB device driver
*
* Copyright (C) 2014-2015 Renesas Electronics Corporation
@@ -5,10 +6,6 @@
* Copyright (C) 2015-2016 Cogent Embedded, Inc. <source@cogentembedded.com>
*
* Based on the SuperH Ethernet driver
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License version 2,
- * as published by the Free Software Foundation.
*/
#ifndef __RAVB_H__
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index c06f2df895c2..aff5516b781e 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Renesas Ethernet AVB device driver
*
* Copyright (C) 2014-2015 Renesas Electronics Corporation
@@ -5,10 +6,6 @@
* Copyright (C) 2015-2016 Cogent Embedded, Inc. <source@cogentembedded.com>
*
* Based on the SuperH Ethernet driver
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License version 2,
- * as published by the Free Software Foundation.
*/
#include <linux/cache.h>
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 5573199c4536..ad4433d59237 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* SuperH Ethernet device driver
*
* Copyright (C) 2014 Renesas Electronics Corporation
@@ -5,18 +6,6 @@
* Copyright (C) 2008-2014 Renesas Solutions Corp.
* Copyright (C) 2013-2017 Cogent Embedded, Inc.
* Copyright (C) 2014 Codethink Limited
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
*/
#include <linux/module.h>
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index f94be99cf400..0c18650bbfe6 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -1,19 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* SuperH Ethernet device driver
*
* Copyright (C) 2006-2012 Nobuhiro Iwamatsu
* Copyright (C) 2008-2012 Renesas Solutions Corp.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
*/
#ifndef __SH_ETH_H__
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index edf20361ea5f..bf4acebb6bcd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -33,7 +33,7 @@ config DWMAC_DWC_QOS_ETH
select PHYLIB
select CRC32
select MII
- depends on OF && COMMON_CLK && HAS_DMA
+ depends on OF && HAS_DMA
help
Support for chips using the snps,dwc-qos-ethernet.txt DT binding.
@@ -57,7 +57,7 @@ config DWMAC_ANARION
config DWMAC_IPQ806X
tristate "QCA IPQ806x DWMAC support"
default ARCH_QCOM
- depends on OF && COMMON_CLK && (ARCH_QCOM || COMPILE_TEST)
+ depends on OF && (ARCH_QCOM || COMPILE_TEST)
select MFD_SYSCON
help
Support for QCA IPQ806X DWMAC Ethernet.
@@ -100,7 +100,7 @@ config DWMAC_OXNAS
config DWMAC_ROCKCHIP
tristate "Rockchip dwmac support"
default ARCH_ROCKCHIP
- depends on OF && COMMON_CLK && (ARCH_ROCKCHIP || COMPILE_TEST)
+ depends on OF && (ARCH_ROCKCHIP || COMPILE_TEST)
select MFD_SYSCON
help
Support for Ethernet controller on Rockchip RK3288 SoC.
@@ -123,7 +123,7 @@ config DWMAC_SOCFPGA
config DWMAC_STI
tristate "STi GMAC support"
default ARCH_STI
- depends on OF && COMMON_CLK && (ARCH_STI || COMPILE_TEST)
+ depends on OF && (ARCH_STI || COMPILE_TEST)
select MFD_SYSCON
---help---
Support for ethernet controller on STi SOCs.
@@ -147,7 +147,7 @@ config DWMAC_STM32
config DWMAC_SUNXI
tristate "Allwinner GMAC support"
default ARCH_SUNXI
- depends on OF && COMMON_CLK && (ARCH_SUNXI || COMPILE_TEST)
+ depends on OF && (ARCH_SUNXI || COMPILE_TEST)
---help---
Support for Allwinner A20/A31 GMAC ethernet controllers.
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 1a96dd9c1091..531294f4978b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -61,7 +61,7 @@ static int tc_fill_actions(struct stmmac_tc_entry *entry,
struct stmmac_tc_entry *action_entry = entry;
const struct tc_action *act;
struct tcf_exts *exts;
- LIST_HEAD(actions);
+ int i;
exts = cls->knode.exts;
if (!tcf_exts_has_actions(exts))
@@ -69,8 +69,7 @@ static int tc_fill_actions(struct stmmac_tc_entry *entry,
if (frag)
action_entry = frag;
- tcf_exts_to_list(exts, &actions);
- list_for_each_entry(act, &actions, list) {
+ tcf_exts_for_each_action(i, act, exts) {
/* Accept */
if (is_tcf_gact_ok(act)) {
action_entry->val.af = 1;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 507f68190cb1..1121a1ec407c 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -29,6 +29,7 @@
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
+#include <linux/pci.h>
#include <linux/skbuff.h>
#include <linux/if_vlan.h>
#include <linux/in.h>
@@ -2039,12 +2040,16 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
{
struct net_device *ndev;
struct net_device_context *net_device_ctx;
+ struct device *pdev = vf_netdev->dev.parent;
struct netvsc_device *netvsc_dev;
int ret;
if (vf_netdev->addr_len != ETH_ALEN)
return NOTIFY_DONE;
+ if (!pdev || !dev_is_pci(pdev) || dev_is_pf(pdev))
+ return NOTIFY_DONE;
+
/*
* We will use the MAC address to locate the synthetic interface to
* associate with the VF interface. If we don't find a matching
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 97742708460b..2cd71bdb6484 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -5217,8 +5217,8 @@ static int rtl8152_probe(struct usb_interface *intf,
netdev->hw_features &= ~NETIF_F_RXCSUM;
}
- if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 &&
- udev->serial && !strcmp(udev->serial, "000001000000")) {
+ if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && udev->serial &&
+ (!strcmp(udev->serial, "000001000000") || !strcmp(udev->serial, "000002000000"))) {
dev_info(&udev->dev, "Dell TB16 Dock, disable RX aggregation");
set_bit(DELL_TB_RX_AGG_BUG, &tp->flags);
}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 1b9951d2067e..d668682f91df 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -316,6 +316,14 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db,
old_value = *dbbuf_db;
*dbbuf_db = value;
+ /*
+ * Ensure that the doorbell is updated before reading the event
+ * index from memory. The controller needs to provide similar
+ * ordering to ensure the envent index is updated before reading
+ * the doorbell.
+ */
+ mb();
+
if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value))
return false;
}
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index ebf3e7a6c49e..b5ec96abd048 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1210,7 +1210,7 @@ static int __init nvmet_init(void)
error = nvmet_init_discovery();
if (error)
- goto out;
+ goto out_free_work_queue;
error = nvmet_init_configfs();
if (error)
@@ -1219,6 +1219,8 @@ static int __init nvmet_init(void)
out_exit_discovery:
nvmet_exit_discovery();
+out_free_work_queue:
+ destroy_workqueue(buffered_io_wq);
out:
return error;
}
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 34712def81b1..5251689a1d9a 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -311,7 +311,7 @@ fcloop_tgt_lsrqst_done_work(struct work_struct *work)
struct fcloop_tport *tport = tls_req->tport;
struct nvmefc_ls_req *lsreq = tls_req->lsreq;
- if (tport->remoteport)
+ if (!tport || tport->remoteport)
lsreq->done(lsreq, tls_req->status);
}
@@ -329,6 +329,7 @@ fcloop_ls_req(struct nvme_fc_local_port *localport,
if (!rport->targetport) {
tls_req->status = -ECONNREFUSED;
+ tls_req->tport = NULL;
schedule_work(&tls_req->work);
return ret;
}
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 466e3c8582f0..9095b8290150 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -54,6 +54,28 @@ DEFINE_MUTEX(of_mutex);
*/
DEFINE_RAW_SPINLOCK(devtree_lock);
+bool of_node_name_eq(const struct device_node *np, const char *name)
+{
+ const char *node_name;
+ size_t len;
+
+ if (!np)
+ return false;
+
+ node_name = kbasename(np->full_name);
+ len = strchrnul(node_name, '@') - node_name;
+
+ return (strlen(name) == len) && (strncmp(node_name, name, len) == 0);
+}
+
+bool of_node_name_prefix(const struct device_node *np, const char *prefix)
+{
+ if (!np)
+ return false;
+
+ return strncmp(kbasename(np->full_name), prefix, strlen(prefix)) == 0;
+}
+
int of_n_addr_cells(struct device_node *np)
{
u32 cells;
@@ -720,6 +742,31 @@ struct device_node *of_get_next_available_child(const struct device_node *node,
EXPORT_SYMBOL(of_get_next_available_child);
/**
+ * of_get_compatible_child - Find compatible child node
+ * @parent: parent node
+ * @compatible: compatible string
+ *
+ * Lookup child node whose compatible property contains the given compatible
+ * string.
+ *
+ * Returns a node pointer with refcount incremented, use of_node_put() on it
+ * when done; or NULL if not found.
+ */
+struct device_node *of_get_compatible_child(const struct device_node *parent,
+ const char *compatible)
+{
+ struct device_node *child;
+
+ for_each_child_of_node(parent, child) {
+ if (of_device_is_compatible(child, compatible))
+ break;
+ }
+
+ return child;
+}
+EXPORT_SYMBOL(of_get_compatible_child);
+
+/**
* of_get_child_by_name - Find the child node by name for a given parent
* @node: parent node
* @name: child name to look for.
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 977a8307fbb1..4f2816559205 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -260,10 +260,13 @@ static int of_thermal_set_mode(struct thermal_zone_device *tz,
mutex_lock(&tz->lock);
- if (mode == THERMAL_DEVICE_ENABLED)
+ if (mode == THERMAL_DEVICE_ENABLED) {
tz->polling_delay = data->polling_delay;
- else
+ tz->passive_delay = data->passive_delay;
+ } else {
tz->polling_delay = 0;
+ tz->passive_delay = 0;
+ }
mutex_unlock(&tz->lock);
diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c
index c866cc165960..450ed66edf58 100644
--- a/drivers/thermal/qoriq_thermal.c
+++ b/drivers/thermal/qoriq_thermal.c
@@ -1,16 +1,6 @@
-/*
- * Copyright 2016 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- */
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright 2016 Freescale Semiconductor, Inc.
#include <linux/module.h>
#include <linux/platform_device.h>
@@ -197,7 +187,7 @@ static int qoriq_tmu_probe(struct platform_device *pdev)
int ret;
struct qoriq_tmu_data *data;
struct device_node *np = pdev->dev.of_node;
- u32 site = 0;
+ u32 site;
if (!np) {
dev_err(&pdev->dev, "Device OF-Node is NULL");
@@ -233,8 +223,9 @@ static int qoriq_tmu_probe(struct platform_device *pdev)
if (ret < 0)
goto err_tmu;
- data->tz = thermal_zone_of_sensor_register(&pdev->dev, data->sensor_id,
- data, &tmu_tz_ops);
+ data->tz = devm_thermal_zone_of_sensor_register(&pdev->dev,
+ data->sensor_id,
+ data, &tmu_tz_ops);
if (IS_ERR(data->tz)) {
ret = PTR_ERR(data->tz);
dev_err(&pdev->dev,
@@ -243,7 +234,7 @@ static int qoriq_tmu_probe(struct platform_device *pdev)
}
/* Enable monitoring */
- site |= 0x1 << (15 - data->sensor_id);
+ site = 0x1 << (15 - data->sensor_id);
tmu_write(data, site | TMR_ME | TMR_ALPF, &data->regs->tmr);
return 0;
@@ -261,8 +252,6 @@ static int qoriq_tmu_remove(struct platform_device *pdev)
{
struct qoriq_tmu_data *data = platform_get_drvdata(pdev);
- thermal_zone_of_sensor_unregister(&pdev->dev, data->tz);
-
/* Disable monitoring */
tmu_write(data, TMR_DISABLE, &data->regs->tmr);
diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c
index 766521eb7071..7aed5337bdd3 100644
--- a/drivers/thermal/rcar_gen3_thermal.c
+++ b/drivers/thermal/rcar_gen3_thermal.c
@@ -1,19 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* R-Car Gen3 THS thermal sensor driver
* Based on rcar_thermal.c and work from Hien Dang and Khiem Nguyen.
*
* Copyright (C) 2016 Renesas Electronics Corporation.
* Copyright (C) 2016 Sang Engineering
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
*/
#include <linux/delay.h>
#include <linux/err.h>
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index e77e63070e99..78f932822d38 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -1,21 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* R-Car THS/TSC thermal sensor driver
*
* Copyright (C) 2012 Renesas Solutions Corp.
* Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*/
#include <linux/delay.h>
#include <linux/err.h>
@@ -660,6 +648,6 @@ static struct platform_driver rcar_thermal_driver = {
};
module_platform_driver(rcar_thermal_driver);
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("R-Car THS/TSC thermal sensor driver");
MODULE_AUTHOR("Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>");
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 96c1d8400822..b13c6b4b2c66 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -952,7 +952,7 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d,
list_for_each_entry_safe(node, n, &d->pending_list, node) {
struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb;
if (msg->iova <= vq_msg->iova &&
- msg->iova + msg->size - 1 > vq_msg->iova &&
+ msg->iova + msg->size - 1 >= vq_msg->iova &&
vq_msg->type == VHOST_IOTLB_MISS) {
vhost_poll_queue(&node->vq->poll);
list_del(&node->node);
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index f2088838f690..5b471889d723 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -402,10 +402,19 @@ static ssize_t modalias_show(struct device *dev,
}
static DEVICE_ATTR_RO(modalias);
+static ssize_t state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%s\n",
+ xenbus_strstate(to_xenbus_device(dev)->state));
+}
+static DEVICE_ATTR_RO(state);
+
static struct attribute *xenbus_dev_attrs[] = {
&dev_attr_nodename.attr,
&dev_attr_devtype.attr,
&dev_attr_modalias.attr,
+ &dev_attr_state.attr,
NULL,
};
diff --git a/fs/buffer.c b/fs/buffer.c
index 4cc679d5bf58..6f1ae3ac9789 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -39,7 +39,6 @@
#include <linux/buffer_head.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/bio.h>
-#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/bitops.h>
#include <linux/mpage.h>
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index ec3fba7d492f..488a9e7f8f66 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -24,6 +24,7 @@
#include <linux/mpage.h>
#include <linux/user_namespace.h>
#include <linux/seq_file.h>
+#include <linux/blkdev.h>
#include "isofs.h"
#include "zisofs.h"
@@ -653,6 +654,12 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
/*
* What if bugger tells us to go beyond page size?
*/
+ if (bdev_logical_block_size(s->s_bdev) > 2048) {
+ printk(KERN_WARNING
+ "ISOFS: unsupported/invalid hardware sector size %d\n",
+ bdev_logical_block_size(s->s_bdev));
+ goto out_freesbi;
+ }
opt.blocksize = sb_min_blocksize(s, opt.blocksize);
sbi->s_high_sierra = 0; /* default is iso9660 */
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 05506d60131c..59cdb27826de 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -132,13 +132,13 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
struct fsnotify_mark *mark;
assert_spin_locked(&conn->lock);
+ /* We can get detached connector here when inode is getting unlinked. */
+ if (!fsnotify_valid_obj_type(conn->type))
+ return;
hlist_for_each_entry(mark, &conn->list, obj_list) {
if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)
new_mask |= mark->mask;
}
- if (WARN_ON(!fsnotify_valid_obj_type(conn->type)))
- return;
-
*fsnotify_conn_mask_p(conn) = new_mask;
}
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 860bfbe7a07a..f0cbf58ad4da 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -18,6 +18,7 @@
#include <linux/quotaops.h>
#include <linux/types.h>
#include <linux/writeback.h>
+#include <linux/nospec.h>
static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
qid_t id)
@@ -120,8 +121,6 @@ static int quota_getinfo(struct super_block *sb, int type, void __user *addr)
struct if_dqinfo uinfo;
int ret;
- /* This checks whether qc_state has enough entries... */
- BUILD_BUG_ON(MAXQUOTAS > XQM_MAXQUOTAS);
if (!sb->s_qcop->get_state)
return -ENOSYS;
ret = sb->s_qcop->get_state(sb, &state);
@@ -354,10 +353,10 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
* GETXSTATE quotactl has space for just one set of time limits so
* report them for the first enabled quota type
*/
- for (type = 0; type < XQM_MAXQUOTAS; type++)
+ for (type = 0; type < MAXQUOTAS; type++)
if (state.s_state[type].flags & QCI_ACCT_ENABLED)
break;
- BUG_ON(type == XQM_MAXQUOTAS);
+ BUG_ON(type == MAXQUOTAS);
fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
@@ -427,10 +426,10 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
* GETXSTATV quotactl has space for just one set of time limits so
* report them for the first enabled quota type
*/
- for (type = 0; type < XQM_MAXQUOTAS; type++)
+ for (type = 0; type < MAXQUOTAS; type++)
if (state.s_state[type].flags & QCI_ACCT_ENABLED)
break;
- BUG_ON(type == XQM_MAXQUOTAS);
+ BUG_ON(type == MAXQUOTAS);
fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
@@ -701,8 +700,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
{
int ret;
- if (type >= (XQM_COMMAND(cmd) ? XQM_MAXQUOTAS : MAXQUOTAS))
+ if (type >= MAXQUOTAS)
return -EINVAL;
+ type = array_index_nospec(type, MAXQUOTAS);
/*
* Quota not supported on this fs? Check this before s_quota_types
* since they needn't be set if quota is not supported at all.
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 3040dc2a32f6..6f515651a2c2 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -764,9 +764,7 @@ static int udf_find_fileset(struct super_block *sb,
struct kernel_lb_addr *root)
{
struct buffer_head *bh = NULL;
- long lastblock;
uint16_t ident;
- struct udf_sb_info *sbi;
if (fileset->logicalBlockNum != 0xFFFFFFFF ||
fileset->partitionReferenceNum != 0xFFFF) {
@@ -779,69 +777,11 @@ static int udf_find_fileset(struct super_block *sb,
return 1;
}
- }
-
- sbi = UDF_SB(sb);
- if (!bh) {
- /* Search backwards through the partitions */
- struct kernel_lb_addr newfileset;
-
-/* --> cvg: FIXME - is it reasonable? */
- return 1;
-
- for (newfileset.partitionReferenceNum = sbi->s_partitions - 1;
- (newfileset.partitionReferenceNum != 0xFFFF &&
- fileset->logicalBlockNum == 0xFFFFFFFF &&
- fileset->partitionReferenceNum == 0xFFFF);
- newfileset.partitionReferenceNum--) {
- lastblock = sbi->s_partmaps
- [newfileset.partitionReferenceNum]
- .s_partition_len;
- newfileset.logicalBlockNum = 0;
-
- do {
- bh = udf_read_ptagged(sb, &newfileset, 0,
- &ident);
- if (!bh) {
- newfileset.logicalBlockNum++;
- continue;
- }
-
- switch (ident) {
- case TAG_IDENT_SBD:
- {
- struct spaceBitmapDesc *sp;
- sp = (struct spaceBitmapDesc *)
- bh->b_data;
- newfileset.logicalBlockNum += 1 +
- ((le32_to_cpu(sp->numOfBytes) +
- sizeof(struct spaceBitmapDesc)
- - 1) >> sb->s_blocksize_bits);
- brelse(bh);
- break;
- }
- case TAG_IDENT_FSD:
- *fileset = newfileset;
- break;
- default:
- newfileset.logicalBlockNum++;
- brelse(bh);
- bh = NULL;
- break;
- }
- } while (newfileset.logicalBlockNum < lastblock &&
- fileset->logicalBlockNum == 0xFFFFFFFF &&
- fileset->partitionReferenceNum == 0xFFFF);
- }
- }
-
- if ((fileset->logicalBlockNum != 0xFFFFFFFF ||
- fileset->partitionReferenceNum != 0xFFFF) && bh) {
udf_debug("Fileset at block=%u, partition=%u\n",
fileset->logicalBlockNum,
fileset->partitionReferenceNum);
- sbi->s_partition = fileset->partitionReferenceNum;
+ UDF_SB(sb)->s_partition = fileset->partitionReferenceNum;
udf_load_fileset(sb, bh, root);
brelse(bh);
return 0;
@@ -1570,10 +1510,16 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_
*/
#define PART_DESC_ALLOC_STEP 32
+struct part_desc_seq_scan_data {
+ struct udf_vds_record rec;
+ u32 partnum;
+};
+
struct desc_seq_scan_data {
struct udf_vds_record vds[VDS_POS_LENGTH];
unsigned int size_part_descs;
- struct udf_vds_record *part_descs_loc;
+ unsigned int num_part_descs;
+ struct part_desc_seq_scan_data *part_descs_loc;
};
static struct udf_vds_record *handle_partition_descriptor(
@@ -1582,10 +1528,14 @@ static struct udf_vds_record *handle_partition_descriptor(
{
struct partitionDesc *desc = (struct partitionDesc *)bh->b_data;
int partnum;
+ int i;
partnum = le16_to_cpu(desc->partitionNumber);
- if (partnum >= data->size_part_descs) {
- struct udf_vds_record *new_loc;
+ for (i = 0; i < data->num_part_descs; i++)
+ if (partnum == data->part_descs_loc[i].partnum)
+ return &(data->part_descs_loc[i].rec);
+ if (data->num_part_descs >= data->size_part_descs) {
+ struct part_desc_seq_scan_data *new_loc;
unsigned int new_size = ALIGN(partnum, PART_DESC_ALLOC_STEP);
new_loc = kcalloc(new_size, sizeof(*new_loc), GFP_KERNEL);
@@ -1597,7 +1547,7 @@ static struct udf_vds_record *handle_partition_descriptor(
data->part_descs_loc = new_loc;
data->size_part_descs = new_size;
}
- return &(data->part_descs_loc[partnum]);
+ return &(data->part_descs_loc[data->num_part_descs++].rec);
}
@@ -1647,6 +1597,7 @@ static noinline int udf_process_sequence(
memset(data.vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH);
data.size_part_descs = PART_DESC_ALLOC_STEP;
+ data.num_part_descs = 0;
data.part_descs_loc = kcalloc(data.size_part_descs,
sizeof(*data.part_descs_loc),
GFP_KERNEL);
@@ -1658,7 +1609,6 @@ static noinline int udf_process_sequence(
* are in it.
*/
for (; (!done && block <= lastblock); block++) {
-
bh = udf_read_tagged(sb, block, block, &ident);
if (!bh)
break;
@@ -1730,13 +1680,10 @@ static noinline int udf_process_sequence(
}
/* Now handle prevailing Partition Descriptors */
- for (i = 0; i < data.size_part_descs; i++) {
- if (data.part_descs_loc[i].block) {
- ret = udf_load_partdesc(sb,
- data.part_descs_loc[i].block);
- if (ret < 0)
- return ret;
- }
+ for (i = 0; i < data.num_part_descs; i++) {
+ ret = udf_load_partdesc(sb, data.part_descs_loc[i].rec.block);
+ if (ret < 0)
+ return ret;
}
return 0;
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index ca1d2cc2cdfa..18863d56273c 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -199,47 +199,57 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
#define __declare_arg_0(a0, res) \
struct arm_smccc_res *___res = res; \
- register u32 r0 asm("r0") = a0; \
+ register unsigned long r0 asm("r0") = (u32)a0; \
register unsigned long r1 asm("r1"); \
register unsigned long r2 asm("r2"); \
register unsigned long r3 asm("r3")
#define __declare_arg_1(a0, a1, res) \
+ typeof(a1) __a1 = a1; \
struct arm_smccc_res *___res = res; \
- register u32 r0 asm("r0") = a0; \
- register typeof(a1) r1 asm("r1") = a1; \
+ register unsigned long r0 asm("r0") = (u32)a0; \
+ register unsigned long r1 asm("r1") = __a1; \
register unsigned long r2 asm("r2"); \
register unsigned long r3 asm("r3")
#define __declare_arg_2(a0, a1, a2, res) \
+ typeof(a1) __a1 = a1; \
+ typeof(a2) __a2 = a2; \
struct arm_smccc_res *___res = res; \
- register u32 r0 asm("r0") = a0; \
- register typeof(a1) r1 asm("r1") = a1; \
- register typeof(a2) r2 asm("r2") = a2; \
+ register unsigned long r0 asm("r0") = (u32)a0; \
+ register unsigned long r1 asm("r1") = __a1; \
+ register unsigned long r2 asm("r2") = __a2; \
register unsigned long r3 asm("r3")
#define __declare_arg_3(a0, a1, a2, a3, res) \
+ typeof(a1) __a1 = a1; \
+ typeof(a2) __a2 = a2; \
+ typeof(a3) __a3 = a3; \
struct arm_smccc_res *___res = res; \
- register u32 r0 asm("r0") = a0; \
- register typeof(a1) r1 asm("r1") = a1; \
- register typeof(a2) r2 asm("r2") = a2; \
- register typeof(a3) r3 asm("r3") = a3
+ register unsigned long r0 asm("r0") = (u32)a0; \
+ register unsigned long r1 asm("r1") = __a1; \
+ register unsigned long r2 asm("r2") = __a2; \
+ register unsigned long r3 asm("r3") = __a3
#define __declare_arg_4(a0, a1, a2, a3, a4, res) \
+ typeof(a4) __a4 = a4; \
__declare_arg_3(a0, a1, a2, a3, res); \
- register typeof(a4) r4 asm("r4") = a4
+ register unsigned long r4 asm("r4") = __a4
#define __declare_arg_5(a0, a1, a2, a3, a4, a5, res) \
+ typeof(a5) __a5 = a5; \
__declare_arg_4(a0, a1, a2, a3, a4, res); \
- register typeof(a5) r5 asm("r5") = a5
+ register unsigned long r5 asm("r5") = __a5
#define __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res) \
+ typeof(a6) __a6 = a6; \
__declare_arg_5(a0, a1, a2, a3, a4, a5, res); \
- register typeof(a6) r6 asm("r6") = a6
+ register unsigned long r6 asm("r6") = __a6
#define __declare_arg_7(a0, a1, a2, a3, a4, a5, a6, a7, res) \
+ typeof(a7) __a7 = a7; \
__declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res); \
- register typeof(a7) r7 asm("r7") = a7
+ register unsigned long r7 asm("r7") = __a7
#define ___declare_args(count, ...) __declare_arg_ ## count(__VA_ARGS__)
#define __declare_args(count, ...) ___declare_args(count, __VA_ARGS__)
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index b79387fd57da..65b4eaed1d96 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -855,7 +855,7 @@ static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg)
}
u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold);
-void i2c_release_dma_safe_msg_buf(struct i2c_msg *msg, u8 *buf);
+void i2c_put_dma_safe_msg_buf(u8 *buf, struct i2c_msg *msg, bool xferred);
int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr);
/**
diff --git a/include/linux/of.h b/include/linux/of.h
index 4d25e4f952d9..99b0ebf49632 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -256,6 +256,9 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
#define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
#define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
+extern bool of_node_name_eq(const struct device_node *np, const char *name);
+extern bool of_node_name_prefix(const struct device_node *np, const char *prefix);
+
static inline const char *of_node_full_name(const struct device_node *np)
{
return np ? np->full_name : "<no-node>";
@@ -290,6 +293,8 @@ extern struct device_node *of_get_next_child(const struct device_node *node,
extern struct device_node *of_get_next_available_child(
const struct device_node *node, struct device_node *prev);
+extern struct device_node *of_get_compatible_child(const struct device_node *parent,
+ const char *compatible);
extern struct device_node *of_get_child_by_name(const struct device_node *node,
const char *name);
@@ -561,6 +566,16 @@ static inline struct device_node *to_of_node(const struct fwnode_handle *fwnode)
return NULL;
}
+static inline bool of_node_name_eq(const struct device_node *np, const char *name)
+{
+ return false;
+}
+
+static inline bool of_node_name_prefix(const struct device_node *np, const char *prefix)
+{
+ return false;
+}
+
static inline const char* of_node_full_name(const struct device_node *np)
{
return "<no-node>";
@@ -632,6 +647,12 @@ static inline bool of_have_populated_dt(void)
return false;
}
+static inline struct device_node *of_get_compatible_child(const struct device_node *parent,
+ const char *compatible)
+{
+ return NULL;
+}
+
static inline struct device_node *of_get_child_by_name(
const struct device_node *node,
const char *name)
@@ -967,6 +988,18 @@ static inline struct device_node *of_find_matching_node(
return of_find_matching_node_and_match(from, matches, NULL);
}
+static inline const char *of_node_get_device_type(const struct device_node *np)
+{
+ return of_get_property(np, "type", NULL);
+}
+
+static inline bool of_node_is_type(const struct device_node *np, const char *type)
+{
+ const char *match = of_node_get_device_type(np);
+
+ return np && match && type && !strcmp(match, type);
+}
+
/**
* of_property_count_u8_elems - Count the number of u8 elements in a property
*
diff --git a/include/linux/platform_data/ina2xx.h b/include/linux/platform_data/ina2xx.h
index 9abc0ca7259b..9f0aa1b48c78 100644
--- a/include/linux/platform_data/ina2xx.h
+++ b/include/linux/platform_data/ina2xx.h
@@ -1,7 +1,7 @@
/*
* Driver for Texas Instruments INA219, INA226 power monitor chips
*
- * Copyright (C) 2012 Lothar Felten <l-felten@ti.com>
+ * Copyright (C) 2012 Lothar Felten <lothar.felten@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index df4d13f7e191..d15f8e4815e3 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -39,15 +39,6 @@
#include <linux/qed/qed_ll2_if.h>
#include <linux/qed/rdma_common.h>
-enum qed_roce_ll2_tx_dest {
- /* Light L2 TX Destination to the Network */
- QED_ROCE_LL2_TX_DEST_NW,
-
- /* Light L2 TX Destination to the Loopback */
- QED_ROCE_LL2_TX_DEST_LB,
- QED_ROCE_LL2_TX_DEST_MAX
-};
-
#define QED_RDMA_MAX_CNQ_SIZE (0xFFFF)
/* rdma interface */
@@ -581,7 +572,7 @@ struct qed_roce_ll2_packet {
int n_seg;
struct qed_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE];
int roce_mode;
- enum qed_roce_ll2_tx_dest tx_dest;
+ enum qed_ll2_tx_dest tx_dest;
};
enum qed_rdma_type {
diff --git a/include/linux/quota.h b/include/linux/quota.h
index ca9772c8e48b..f32dd270b8e3 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -408,13 +408,7 @@ struct qc_type_state {
struct qc_state {
unsigned int s_incoredqs; /* Number of dquots in core */
- /*
- * Per quota type information. The array should really have
- * max(MAXQUOTAS, XQM_MAXQUOTAS) entries. BUILD_BUG_ON in
- * quota_getinfo() makes sure XQM_MAXQUOTAS is large enough. Once VFS
- * supports project quotas, this can be changed to MAXQUOTAS
- */
- struct qc_type_state s_state[XQM_MAXQUOTAS];
+ struct qc_type_state s_state[MAXQUOTAS]; /* Per quota type information */
};
/* Structure for communicating via ->set_info */
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 1ad5b19e83a9..970303448c90 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -23,13 +23,11 @@ struct tc_action {
const struct tc_action_ops *ops;
__u32 type; /* for backward compat(TCA_OLD_COMPAT) */
__u32 order;
- struct list_head list;
struct tcf_idrinfo *idrinfo;
u32 tcfa_index;
refcount_t tcfa_refcnt;
atomic_t tcfa_bindcnt;
- u32 tcfa_capab;
int tcfa_action;
struct tcf_t tcfa_tm;
struct gnet_stats_basic_packed tcfa_bstats;
@@ -44,7 +42,6 @@ struct tc_action {
#define tcf_index common.tcfa_index
#define tcf_refcnt common.tcfa_refcnt
#define tcf_bindcnt common.tcfa_bindcnt
-#define tcf_capab common.tcfa_capab
#define tcf_action common.tcfa_action
#define tcf_tm common.tcfa_tm
#define tcf_bstats common.tcfa_bstats
@@ -102,7 +99,6 @@ struct tc_action_ops {
size_t (*get_fill_size)(const struct tc_action *act);
struct net_device *(*get_dev)(const struct tc_action *a);
void (*put_dev)(struct net_device *dev);
- int (*delete)(struct net *net, u32 index);
};
struct tc_action_net {
@@ -148,8 +144,6 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
const struct tc_action_ops *ops,
struct netlink_ext_ack *extack);
int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index);
-bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
- int bind);
int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
struct tc_action **a, const struct tc_action_ops *ops,
int bind, bool cpustats);
@@ -158,7 +152,6 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
void tcf_idr_cleanup(struct tc_action_net *tn, u32 index);
int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
struct tc_action **a, int bind);
-int tcf_idr_delete_index(struct tc_action_net *tn, u32 index);
int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
static inline int tcf_idr_release(struct tc_action *a, bool bind)
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index ef727f71336e..75a3f3fdb359 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -298,19 +298,13 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts)
#endif
}
-static inline void tcf_exts_to_list(const struct tcf_exts *exts,
- struct list_head *actions)
-{
#ifdef CONFIG_NET_CLS_ACT
- int i;
-
- for (i = 0; i < exts->nr_actions; i++) {
- struct tc_action *a = exts->actions[i];
-
- list_add_tail(&a->list, actions);
- }
+#define tcf_exts_for_each_action(i, a, exts) \
+ for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = (exts)->actions[i]); i++)
+#else
+#define tcf_exts_for_each_action(i, a, exts) \
+ for (; 0; (void)(i), (void)(a), (void)(exts))
#endif
-}
static inline void
tcf_exts_stats_update(const struct tcf_exts *exts,
@@ -361,6 +355,15 @@ static inline bool tcf_exts_has_one_action(struct tcf_exts *exts)
#endif
}
+static inline struct tc_action *tcf_exts_first_action(struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ return exts->actions[0];
+#else
+ return NULL;
+#endif
+}
+
/**
* tcf_exts_exec - execute tc filter extensions
* @skb: socket buffer
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 77c7908b7d73..2734c895c1bf 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -46,7 +46,6 @@
#include <net/ip.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
-#include <net/ipv6.h>
#include <net/net_namespace.h>
/**
@@ -95,20 +94,18 @@ int rdma_translate_ip(const struct sockaddr *addr,
* @timeout_ms: Amount of time to wait for the address resolution to complete.
* @callback: Call invoked once address resolution has completed, timed out,
* or been canceled. A status of 0 indicates success.
+ * @resolve_by_gid_attr: Resolve the ip based on the GID attribute from
+ * rdma_dev_addr.
* @context: User-specified context associated with the call.
*/
int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
- struct rdma_dev_addr *addr, int timeout_ms,
+ struct rdma_dev_addr *addr, unsigned long timeout_ms,
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context),
- void *context);
+ bool resolve_by_gid_attr, void *context);
void rdma_addr_cancel(struct rdma_dev_addr *addr);
-void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
- const struct net_device *dev,
- const unsigned char *dst_dev_addr);
-
int rdma_addr_size(const struct sockaddr *addr);
int rdma_addr_size_in6(struct sockaddr_in6 *addr);
int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr);
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index c10f4b5ea8ab..49f4f75499b3 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -583,7 +583,7 @@ struct ib_cm_sidr_req_param {
struct sa_path_rec *path;
const struct ib_gid_attr *sgid_attr;
__be64 service_id;
- int timeout_ms;
+ unsigned long timeout_ms;
const void *private_data;
u8 private_data_len;
u8 max_cm_retries;
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index b6ddf2a1b9d8..19520979b84c 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -449,28 +449,23 @@ struct ib_sa_query;
void ib_sa_cancel_query(int id, struct ib_sa_query *query);
-int ib_sa_path_rec_get(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- struct sa_path_rec *rec,
- ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct sa_path_rec *resp,
+int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device,
+ u8 port_num, struct sa_path_rec *rec,
+ ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
+ gfp_t gfp_mask,
+ void (*callback)(int status, struct sa_path_rec *resp,
void *context),
- void *context,
- struct ib_sa_query **query);
+ void *context, struct ib_sa_query **query);
int ib_sa_service_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- u8 method,
- struct ib_sa_service_rec *rec,
- ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_sa_service_rec *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query);
+ struct ib_device *device, u8 port_num, u8 method,
+ struct ib_sa_service_rec *rec,
+ ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
+ gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_service_rec *resp,
+ void *context),
+ void *context, struct ib_sa_query **sa_query);
struct ib_sa_multicast {
struct ib_sa_mcmember_rec rec;
@@ -573,12 +568,11 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct ib_sa_guidinfo_rec *rec,
ib_sa_comp_mask comp_mask, u8 method,
- int timeout_ms, gfp_t gfp_mask,
+ unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_guidinfo_rec *resp,
void *context),
- void *context,
- struct ib_sa_query **sa_query);
+ void *context, struct ib_sa_query **sa_query);
bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
struct ib_device *device,
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index a1fd63871d17..5d3755ec5afa 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -42,15 +42,14 @@ struct ib_umem_odp;
struct ib_umem {
struct ib_ucontext *context;
+ struct mm_struct *owning_mm;
size_t length;
unsigned long address;
int page_shift;
- int writable;
- int hugetlb;
+ u32 writable : 1;
+ u32 hugetlb : 1;
+ u32 is_odp : 1;
struct work_struct work;
- struct mm_struct *mm;
- unsigned long diff;
- struct ib_umem_odp *odp_data;
struct sg_table sg_head;
int nmap;
int npages;
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 381cdf5a9bd1..0b1446fe2fab 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -43,6 +43,9 @@ struct umem_odp_node {
};
struct ib_umem_odp {
+ struct ib_umem umem;
+ struct ib_ucontext_per_mm *per_mm;
+
/*
* An array of the pages included in the on-demand paging umem.
* Indices of pages that are currently not mapped into the device will
@@ -64,16 +67,9 @@ struct ib_umem_odp {
struct mutex umem_mutex;
void *private; /* for the HW driver to use. */
- /* When false, use the notifier counter in the ucontext struct. */
- bool mn_counters_active;
int notifiers_seq;
int notifiers_count;
- /* A linked list of umems that don't have private mmu notifier
- * counters yet. */
- struct list_head no_private_counters;
- struct ib_umem *umem;
-
/* Tree tracking */
struct umem_odp_node interval_tree;
@@ -82,15 +78,34 @@ struct ib_umem_odp {
struct work_struct work;
};
+static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem)
+{
+ return container_of(umem, struct ib_umem_odp, umem);
+}
+
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
- int access);
-struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
- unsigned long addr,
- size_t size);
+struct ib_ucontext_per_mm {
+ struct ib_ucontext *context;
+ struct mm_struct *mm;
+ struct pid *tgid;
+ bool active;
+
+ struct rb_root_cached umem_tree;
+ /* Protects umem_tree */
+ struct rw_semaphore umem_rwsem;
-void ib_umem_odp_release(struct ib_umem *umem);
+ struct mmu_notifier mn;
+ unsigned int odp_mrs_count;
+
+ struct list_head ucontext_list;
+ struct rcu_head rcu;
+};
+
+int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access);
+struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
+ unsigned long addr, size_t size);
+void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
/*
* The lower 2 bits of the DMA address signal the R/W permissions for
@@ -105,13 +120,14 @@ void ib_umem_odp_release(struct ib_umem *umem);
#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
-int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt,
- u64 access_mask, unsigned long current_seq);
+int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
+ u64 bcnt, u64 access_mask,
+ unsigned long current_seq);
-void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset,
+void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
u64 bound);
-typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
+typedef int (*umem_call_back)(struct ib_umem_odp *item, u64 start, u64 end,
void *cookie);
/*
* Call the callback on each ib_umem in the range. Returns the logical or of
@@ -129,46 +145,37 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
u64 addr, u64 length);
-static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
+static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
unsigned long mmu_seq)
{
/*
* This code is strongly based on the KVM code from
* mmu_notifier_retry. Should be called with
- * the relevant locks taken (item->odp_data->umem_mutex
+ * the relevant locks taken (umem_odp->umem_mutex
* and the ucontext umem_mutex semaphore locked for read).
*/
- /* Do not allow page faults while the new ib_umem hasn't seen a state
- * with zero notifiers yet, and doesn't have its own valid set of
- * private counters. */
- if (!item->odp_data->mn_counters_active)
- return 1;
-
- if (unlikely(item->odp_data->notifiers_count))
+ if (unlikely(umem_odp->notifiers_count))
return 1;
- if (item->odp_data->notifiers_seq != mmu_seq)
+ if (umem_odp->notifiers_seq != mmu_seq)
return 1;
return 0;
}
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
-static inline int ib_umem_odp_get(struct ib_ucontext *context,
- struct ib_umem *umem,
- int access)
+static inline int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
{
return -EINVAL;
}
-static inline struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
- unsigned long addr,
- size_t size)
+static inline struct ib_umem_odp *
+ib_alloc_odp_umem(struct ib_ucontext *context, unsigned long addr, size_t size)
{
return ERR_PTR(-EINVAL);
}
-static inline void ib_umem_odp_release(struct ib_umem *umem) {}
+static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e950c2a68f06..b17eea0373cb 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -69,8 +69,11 @@
#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
+struct ib_umem_odp;
+
extern struct workqueue_struct *ib_wq;
extern struct workqueue_struct *ib_comp_wq;
+extern struct workqueue_struct *ib_comp_unbound_wq;
union ib_gid {
u8 raw[16];
@@ -1137,7 +1140,9 @@ enum ib_qp_create_flags {
*/
struct ib_qp_init_attr {
+ /* Consumer's event_handler callback must not block */
void (*event_handler)(struct ib_event *, void *);
+
void *qp_context;
struct ib_cq *send_cq;
struct ib_cq *recv_cq;
@@ -1146,7 +1151,7 @@ struct ib_qp_init_attr {
struct ib_qp_cap cap;
enum ib_sig_type sq_sig_type;
enum ib_qp_type qp_type;
- enum ib_qp_create_flags create_flags;
+ u32 create_flags;
/*
* Only needed for special QP types, or when using the RW API.
@@ -1278,21 +1283,27 @@ struct ib_qp_attr {
};
enum ib_wr_opcode {
- IB_WR_RDMA_WRITE,
- IB_WR_RDMA_WRITE_WITH_IMM,
- IB_WR_SEND,
- IB_WR_SEND_WITH_IMM,
- IB_WR_RDMA_READ,
- IB_WR_ATOMIC_CMP_AND_SWP,
- IB_WR_ATOMIC_FETCH_AND_ADD,
- IB_WR_LSO,
- IB_WR_SEND_WITH_INV,
- IB_WR_RDMA_READ_WITH_INV,
- IB_WR_LOCAL_INV,
- IB_WR_REG_MR,
- IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
- IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+ /* These are shared with userspace */
+ IB_WR_RDMA_WRITE = IB_UVERBS_WR_RDMA_WRITE,
+ IB_WR_RDMA_WRITE_WITH_IMM = IB_UVERBS_WR_RDMA_WRITE_WITH_IMM,
+ IB_WR_SEND = IB_UVERBS_WR_SEND,
+ IB_WR_SEND_WITH_IMM = IB_UVERBS_WR_SEND_WITH_IMM,
+ IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ,
+ IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP,
+ IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD,
+ IB_WR_LSO = IB_UVERBS_WR_TSO,
+ IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV,
+ IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV,
+ IB_WR_LOCAL_INV = IB_UVERBS_WR_LOCAL_INV,
+ IB_WR_MASKED_ATOMIC_CMP_AND_SWP =
+ IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP,
+ IB_WR_MASKED_ATOMIC_FETCH_AND_ADD =
+ IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+
+ /* These are kernel only and can not be issued by userspace */
+ IB_WR_REG_MR = 0x20,
IB_WR_REG_SIG_MR,
+
/* reserve values for low level drivers' internal use.
* These values will not be used at all in the ib core layer.
*/
@@ -1485,26 +1496,15 @@ struct ib_ucontext {
* it is set when we are closing the file descriptor and indicates
* that mm_sem may be locked.
*/
- int closing;
+ bool closing;
bool cleanup_retryable;
- struct pid *tgid;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- struct rb_root_cached umem_tree;
- /*
- * Protects .umem_rbroot and tree, as well as odp_mrs_count and
- * mmu notifiers registration.
- */
- struct rw_semaphore umem_rwsem;
- void (*invalidate_range)(struct ib_umem *umem,
+ void (*invalidate_range)(struct ib_umem_odp *umem_odp,
unsigned long start, unsigned long end);
-
- struct mmu_notifier mn;
- atomic_t notifier_count;
- /* A list of umems that don't have private mmu notifier counters yet. */
- struct list_head no_private_counters;
- int odp_mrs_count;
+ struct mutex per_mm_list_lock;
+ struct list_head per_mm_list;
#endif
struct ib_rdmacg_object cg_obj;
@@ -1570,9 +1570,10 @@ struct ib_ah {
typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
enum ib_poll_context {
- IB_POLL_DIRECT, /* caller context, no hw completions */
- IB_POLL_SOFTIRQ, /* poll from softirq context */
- IB_POLL_WORKQUEUE, /* poll from workqueue */
+ IB_POLL_DIRECT, /* caller context, no hw completions */
+ IB_POLL_SOFTIRQ, /* poll from softirq context */
+ IB_POLL_WORKQUEUE, /* poll from workqueue */
+ IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
};
struct ib_cq {
@@ -1589,6 +1590,7 @@ struct ib_cq {
struct irq_poll iop;
struct work_struct work;
};
+ struct workqueue_struct *comp_wq;
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
@@ -2253,10 +2255,11 @@ struct ib_device {
struct list_head event_handler_list;
spinlock_t event_handler_lock;
- spinlock_t client_data_lock;
+ rwlock_t client_data_lock;
struct list_head core_list;
/* Access to the client_data_list is protected by the client_data_lock
- * spinlock and the lists_rwsem read-write semaphore */
+ * rwlock and the lists_rwsem read-write semaphore
+ */
struct list_head client_data_list;
struct ib_cache cache;
@@ -2536,7 +2539,13 @@ struct ib_device {
struct module *owner;
struct device dev;
- struct kobject *ports_parent;
+ /* First group for device attributes,
+ * Second group for driver provided attributes (optional).
+ * It is NULL terminated array.
+ */
+ const struct attribute_group *groups[3];
+
+ struct kobject *ports_kobj;
struct list_head port_list;
enum {
@@ -2619,9 +2628,9 @@ void ib_dealloc_device(struct ib_device *device);
void ib_get_device_fw_str(struct ib_device *device, char *str);
-int ib_register_device(struct ib_device *device,
- int (*port_callback)(struct ib_device *,
- u8, struct kobject *));
+int ib_register_device(struct ib_device *device, const char *name,
+ int (*port_callback)(struct ib_device *, u8,
+ struct kobject *));
void ib_unregister_device(struct ib_device *device);
int ib_register_client (struct ib_client *client);
@@ -2631,6 +2640,28 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void *data);
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
+ unsigned long pfn, unsigned long size, pgprot_t prot);
+int rdma_user_mmap_page(struct ib_ucontext *ucontext,
+ struct vm_area_struct *vma, struct page *page,
+ unsigned long size);
+#else
+static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext,
+ struct vm_area_struct *vma,
+ unsigned long pfn, unsigned long size,
+ pgprot_t prot)
+{
+ return -EINVAL;
+}
+static inline int rdma_user_mmap_page(struct ib_ucontext *ucontext,
+ struct vm_area_struct *vma, struct page *page,
+ unsigned long size)
+{
+ return -EINVAL;
+}
+#endif
+
static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
{
return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
@@ -2714,7 +2745,6 @@ static inline int ib_destroy_usecnt(atomic_t *usecnt,
* @next_state: Next QP state
* @type: QP type
* @mask: Mask of supplied QP attributes
- * @ll : link layer of port
*
* This function is a helper function that a low-level driver's
* modify_qp method can use to validate the consumer's input. It
@@ -2723,8 +2753,7 @@ static inline int ib_destroy_usecnt(atomic_t *usecnt,
* and that the attribute mask supplied is allowed for the transition.
*/
bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
- enum ib_qp_type type, enum ib_qp_attr_mask mask,
- enum rdma_link_layer ll);
+ enum ib_qp_type type, enum ib_qp_attr_mask mask);
void ib_register_event_handler(struct ib_event_handler *event_handler);
void ib_unregister_event_handler(struct ib_event_handler *event_handler);
@@ -4153,20 +4182,6 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
}
-static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
- struct ib_qp *qp, struct ib_device *device)
-{
- uobj->object = ibflow;
- ibflow->uobject = uobj;
-
- if (qp) {
- atomic_inc(&qp->usecnt);
- ibflow->qp = qp;
- }
-
- ibflow->device = device;
-}
-
/**
* rdma_roce_rescan_device - Rescan all of the network devices in the system
* and add their gids, as needed, to the relevant RoCE devices.
@@ -4179,4 +4194,27 @@ struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs);
+
+/**
+ * rdma_set_device_sysfs_group - Set device attributes group to have
+ * driver specific sysfs entries at
+ * for infiniband class.
+ *
+ * @device: device pointer for which attributes to be created
+ * @group: Pointer to group which should be added when device
+ * is registered with sysfs.
+ * rdma_set_device_sysfs_group() allows existing drivers to expose one
+ * group per device to have sysfs attributes.
+ *
+ * NOTE: New drivers should not make use of this API; instead new device
+ * parameter should be exposed via netlink command. This API and mechanism
+ * exist only for existing drivers.
+ */
+static inline void
+rdma_set_device_sysfs_group(struct ib_device *dev,
+ const struct attribute_group *group)
+{
+ dev->groups[1] = group;
+}
+
#endif /* IB_VERBS_H */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 5d71a7f51a9f..60987a5903b7 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -152,7 +152,11 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
* @ps: RDMA port space.
* @qp_type: type of queue pair associated with the id.
*
- * The id holds a reference on the network namespace until it is destroyed.
+ * Returns a new rdma_cm_id. The id holds a reference on the network
+ * namespace until it is destroyed.
+ *
+ * The event handler callback serializes on the id's mutex and is
+ * allowed to sleep.
*/
#define rdma_create_id(net, event_handler, context, ps, qp_type) \
__rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \
@@ -192,7 +196,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr);
* @timeout_ms: Time to wait for resolution to complete.
*/
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- const struct sockaddr *dst_addr, int timeout_ms);
+ const struct sockaddr *dst_addr,
+ unsigned long timeout_ms);
/**
* rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier
@@ -202,7 +207,7 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
* Users must have first called rdma_resolve_addr to resolve a dst_addr
* into an RDMA address before calling this routine.
*/
-int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms);
+int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms);
/**
* rdma_create_qp - Allocate a QP and associate it with the specified RDMA
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index c369703fcd69..70218e6b5187 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -96,7 +96,7 @@ int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags);
/**
* Check if there are any listeners to the netlink group
* @group: the netlink group ID
- * Returns 0 on success or a negative for no listeners.
+ * Returns true on success or false if no listeners.
*/
-int rdma_nl_chk_listeners(unsigned int group);
+bool rdma_nl_chk_listeners(unsigned int group);
#endif /* _RDMA_NETLINK_H */
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index e79229a0cf01..3584d0816fcd 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -149,6 +149,10 @@ struct rvt_ibport {
#define RVT_CQN_MAX 16 /* maximum length of cq name */
+#define RVT_SGE_COPY_MEMCPY 0
+#define RVT_SGE_COPY_CACHELESS 1
+#define RVT_SGE_COPY_ADAPTIVE 2
+
/*
* Things that are driver specific, module parameters in hfi1 and qib
*/
@@ -161,6 +165,9 @@ struct rvt_driver_params {
*/
unsigned int lkey_table_size;
unsigned int qp_table_size;
+ unsigned int sge_copy_mode;
+ unsigned int wss_threshold;
+ unsigned int wss_clean_period;
int qpn_start;
int qpn_inc;
int qpn_res_start;
@@ -193,6 +200,19 @@ struct rvt_ah {
u8 log_pmtu;
};
+/* memory working set size */
+struct rvt_wss {
+ unsigned long *entries;
+ atomic_t total_count;
+ atomic_t clean_counter;
+ atomic_t clean_entry;
+
+ int threshold;
+ int num_entries;
+ long pages_mask;
+ unsigned int clean_period;
+};
+
struct rvt_dev_info;
struct rvt_swqe;
struct rvt_driver_provided {
@@ -211,11 +231,18 @@ struct rvt_driver_provided {
* version requires the s_lock not to be held. The other assumes the
* s_lock is held.
*/
- void (*schedule_send)(struct rvt_qp *qp);
- void (*schedule_send_no_lock)(struct rvt_qp *qp);
+ bool (*schedule_send)(struct rvt_qp *qp);
+ bool (*schedule_send_no_lock)(struct rvt_qp *qp);
- /* Driver specific work request checking */
- int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
+ /*
+ * Driver specific work request setup and checking.
+ * This function is allowed to perform any setup, checks, or
+ * adjustments required to the SWQE in order to be usable by
+ * underlying protocols. This includes private data structure
+ * allocations.
+ */
+ int (*setup_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ bool *call_send);
/*
* Sometimes rdmavt needs to kick the driver's send progress. That is
@@ -371,6 +398,9 @@ struct rvt_dev_info {
/* post send table */
const struct rvt_operation_params *post_parms;
+ /* opcode translation table */
+ const enum ib_wc_opcode *wc_opcode;
+
/* Driver specific helper functions */
struct rvt_driver_provided driver_f;
@@ -411,6 +441,8 @@ struct rvt_dev_info {
u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
spinlock_t n_mcast_grps_lock;
+ /* Memory Working Set Size */
+ struct rvt_wss *wss;
};
/**
@@ -423,7 +455,14 @@ static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi,
const char *fmt, const char *name,
const int unit)
{
- snprintf(rdi->ibdev.name, sizeof(rdi->ibdev.name), fmt, name, unit);
+ /*
+ * FIXME: rvt and its users want to touch the ibdev before
+ * registration and have things like the name work. We don't have the
+ * infrastructure in the core to support this directly today, hack it
+ * to work by setting the name manually here.
+ */
+ dev_set_name(&rdi->ibdev.dev, fmt, name, unit);
+ strlcpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX);
}
/**
@@ -434,7 +473,7 @@ static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi,
*/
static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi)
{
- return rdi->ibdev.name;
+ return dev_name(&rdi->ibdev.dev);
}
static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd)
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 927f6d5b6d0f..cbafb1878669 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -678,6 +678,13 @@ void rvt_del_timers_sync(struct rvt_qp *qp);
void rvt_stop_rc_timers(struct rvt_qp *qp);
void rvt_add_retry_timer(struct rvt_qp *qp);
+void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss,
+ void *data, u32 length,
+ bool release, bool copy_last);
+void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ enum ib_wc_status status);
+void rvt_ruc_loopback(struct rvt_qp *qp);
+
/**
* struct rvt_qp_iter - the iterator for QPs
* @qp - the current QP
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 9654d33edd98..2638fa7cd702 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -173,16 +173,10 @@ int rdma_restrack_put(struct rdma_restrack_entry *res);
/**
* rdma_restrack_set_task() - set the task for this resource
* @res: resource entry
- * @task: task struct
+ * @caller: kernel name, the current task will be used if the caller is NULL.
*/
-static inline void rdma_restrack_set_task(struct rdma_restrack_entry *res,
- struct task_struct *task)
-{
- if (res->task)
- put_task_struct(res->task);
- get_task_struct(task);
- res->task = task;
-}
+void rdma_restrack_set_task(struct rdma_restrack_entry *res,
+ const char *caller);
/*
* Helper functions for rdma drivers when filling out
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 9e997c3c2f04..84d3d15f1f38 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -52,6 +52,7 @@ enum uverbs_attr_type {
UVERBS_ATTR_TYPE_IDR,
UVERBS_ATTR_TYPE_FD,
UVERBS_ATTR_TYPE_ENUM_IN,
+ UVERBS_ATTR_TYPE_IDRS_ARRAY,
};
enum uverbs_obj_access {
@@ -101,7 +102,7 @@ struct uverbs_attr_spec {
} enum_def;
} u;
- /* This weird split of the enum lets us remove some padding */
+ /* This weird split lets us remove some padding */
union {
struct {
/*
@@ -111,6 +112,17 @@ struct uverbs_attr_spec {
*/
const struct uverbs_attr_spec *ids;
} enum_def;
+
+ struct {
+ /*
+ * higher bits mean the namespace and lower bits mean
+ * the type id within the namespace.
+ */
+ u16 obj_type;
+ u16 min_len;
+ u16 max_len;
+ u8 access;
+ } objs_arr;
} u2;
};
@@ -251,6 +263,11 @@ static inline __attribute_const__ u32 uapi_bkey_attr(u32 attr_key)
return attr_key - 1;
}
+static inline __attribute_const__ u32 uapi_bkey_to_key_attr(u32 attr_bkey)
+{
+ return attr_bkey + 1;
+}
+
/*
* =======================================
* Verbs definitions
@@ -323,6 +340,27 @@ struct uverbs_object_tree_def {
#define UA_MANDATORY .mandatory = 1
#define UA_OPTIONAL .mandatory = 0
+/*
+ * min_len must be bigger than 0 and _max_len must be smaller than 4095. Only
+ * READ\WRITE accesses are supported.
+ */
+#define UVERBS_ATTR_IDRS_ARR(_attr_id, _idr_type, _access, _min_len, _max_len, \
+ ...) \
+ (&(const struct uverbs_attr_def){ \
+ .id = (_attr_id) + \
+ BUILD_BUG_ON_ZERO((_min_len) == 0 || \
+ (_max_len) > \
+ PAGE_SIZE / sizeof(void *) || \
+ (_min_len) > (_max_len) || \
+ (_access) == UVERBS_ACCESS_NEW || \
+ (_access) == UVERBS_ACCESS_DESTROY), \
+ .attr = { .type = UVERBS_ATTR_TYPE_IDRS_ARRAY, \
+ .u2.objs_arr.obj_type = _idr_type, \
+ .u2.objs_arr.access = _access, \
+ .u2.objs_arr.min_len = _min_len, \
+ .u2.objs_arr.max_len = _max_len, \
+ __VA_ARGS__ } })
+
#define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \
(&(const struct uverbs_attr_def){ \
.id = _attr_id, \
@@ -365,6 +403,15 @@ struct uverbs_object_tree_def {
__VA_ARGS__ }, \
})
+/* An input value that is a member in the enum _enum_type. */
+#define UVERBS_ATTR_CONST_IN(_attr_id, _enum_type, ...) \
+ UVERBS_ATTR_PTR_IN( \
+ _attr_id, \
+ UVERBS_ATTR_SIZE( \
+ sizeof(u64) + BUILD_BUG_ON_ZERO(!sizeof(_enum_type)), \
+ sizeof(u64)), \
+ __VA_ARGS__)
+
/*
* An input value that is a bitwise combination of values of _enum_type.
* This permits the flag value to be passed as either a u32 or u64, it must
@@ -431,10 +478,16 @@ struct uverbs_obj_attr {
const struct uverbs_api_attr *attr_elm;
};
+struct uverbs_objs_arr_attr {
+ struct ib_uobject **uobjects;
+ u16 len;
+};
+
struct uverbs_attr {
union {
struct uverbs_ptr_attr ptr_attr;
struct uverbs_obj_attr obj_attr;
+ struct uverbs_objs_arr_attr objs_arr_attr;
};
};
@@ -507,6 +560,31 @@ uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
return attr->ptr_attr.len;
}
+/**
+ * uverbs_attr_get_uobjs_arr() - Provides array's properties for attribute for
+ * UVERBS_ATTR_TYPE_IDRS_ARRAY.
+ * @arr: Returned pointer to array of pointers for uobjects or NULL if
+ * the attribute isn't provided.
+ *
+ * Return: The array length or 0 if no attribute was provided.
+ */
+static inline int uverbs_attr_get_uobjs_arr(
+ const struct uverbs_attr_bundle *attrs_bundle, u16 attr_idx,
+ struct ib_uobject ***arr)
+{
+ const struct uverbs_attr *attr =
+ uverbs_attr_get(attrs_bundle, attr_idx);
+
+ if (IS_ERR(attr)) {
+ *arr = NULL;
+ return 0;
+ }
+
+ *arr = attr->objs_arr_attr.uobjects;
+
+ return attr->objs_arr_attr.len;
+}
+
static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
{
return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data);
@@ -603,6 +681,9 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
{
return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO);
}
+int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, s64 lower_bound, u64 upper_bound,
+ s64 *def_val);
#else
static inline int
uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
@@ -631,6 +712,34 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
{
return ERR_PTR(-EINVAL);
}
+static inline int
+_uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, s64 lower_bound, u64 upper_bound,
+ s64 *def_val)
+{
+ return -EINVAL;
+}
#endif
+#define uverbs_get_const(_to, _attrs_bundle, _idx) \
+ ({ \
+ s64 _val; \
+ int _ret = _uverbs_get_const(&_val, _attrs_bundle, _idx, \
+ type_min(typeof(*_to)), \
+ type_max(typeof(*_to)), NULL); \
+ (*_to) = _val; \
+ _ret; \
+ })
+
+#define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default) \
+ ({ \
+ s64 _val; \
+ s64 _def_val = _default; \
+ int _ret = \
+ _uverbs_get_const(&_val, _attrs_bundle, _idx, \
+ type_min(typeof(*_to)), \
+ type_max(typeof(*_to)), &_def_val); \
+ (*_to) = _val; \
+ _ret; \
+ })
#endif
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 3b00231cc084..3db2802fbc68 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -140,5 +140,56 @@ __uobj_alloc(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile,
#define uobj_alloc(_type, _ufile, _ib_dev) \
__uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev)
+static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action,
+ struct ib_uobject *uobj,
+ struct ib_device *ib_dev,
+ enum ib_flow_action_type type)
+{
+ atomic_set(&action->usecnt, 0);
+ action->device = ib_dev;
+ action->type = type;
+ action->uobject = uobj;
+ uobj->object = action;
+}
+
+struct ib_uflow_resources {
+ size_t max;
+ size_t num;
+ size_t collection_num;
+ size_t counters_num;
+ struct ib_counters **counters;
+ struct ib_flow_action **collection;
+};
+
+struct ib_uflow_object {
+ struct ib_uobject uobject;
+ struct ib_uflow_resources *resources;
+};
+
+struct ib_uflow_resources *flow_resources_alloc(size_t num_specs);
+void flow_resources_add(struct ib_uflow_resources *uflow_res,
+ enum ib_flow_spec_type type,
+ void *ibobj);
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
+
+static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
+ struct ib_qp *qp, struct ib_device *device,
+ struct ib_uflow_resources *uflow_res)
+{
+ struct ib_uflow_object *uflow;
+
+ uobj->object = ibflow;
+ ibflow->uobject = uobj;
+
+ if (qp) {
+ atomic_inc(&qp->usecnt);
+ ibflow->qp = qp;
+ }
+
+ ibflow->device = device;
+ uflow = container_of(uobj, typeof(*uflow), uobject);
+ uflow->resources = uflow_res;
+}
+
#endif
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 25a16760de2a..1254b51a551a 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -763,10 +763,28 @@ struct ib_uverbs_sge {
__u32 lkey;
};
+enum ib_uverbs_wr_opcode {
+ IB_UVERBS_WR_RDMA_WRITE = 0,
+ IB_UVERBS_WR_RDMA_WRITE_WITH_IMM = 1,
+ IB_UVERBS_WR_SEND = 2,
+ IB_UVERBS_WR_SEND_WITH_IMM = 3,
+ IB_UVERBS_WR_RDMA_READ = 4,
+ IB_UVERBS_WR_ATOMIC_CMP_AND_SWP = 5,
+ IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD = 6,
+ IB_UVERBS_WR_LOCAL_INV = 7,
+ IB_UVERBS_WR_BIND_MW = 8,
+ IB_UVERBS_WR_SEND_WITH_INV = 9,
+ IB_UVERBS_WR_TSO = 10,
+ IB_UVERBS_WR_RDMA_READ_WITH_INV = 11,
+ IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP = 12,
+ IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD = 13,
+ /* Review enum ib_wr_opcode before modifying this */
+};
+
struct ib_uverbs_send_wr {
__aligned_u64 wr_id;
__u32 num_sge;
- __u32 opcode;
+ __u32 opcode; /* see enum ib_uverbs_wr_opcode */
__u32 send_flags;
union {
__be32 imm_data;
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index addbb9c4529e..6056625237cf 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -45,6 +45,8 @@ enum {
MLX5_QP_FLAG_BFREG_INDEX = 1 << 3,
MLX5_QP_FLAG_TYPE_DCT = 1 << 4,
MLX5_QP_FLAG_TYPE_DCI = 1 << 5,
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC = 1 << 6,
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC = 1 << 7,
};
enum {
@@ -349,9 +351,22 @@ struct mlx5_ib_create_qp_rss {
__u32 flags;
};
+enum mlx5_ib_create_qp_resp_mask {
+ MLX5_IB_CREATE_QP_RESP_MASK_TIRN = 1UL << 0,
+ MLX5_IB_CREATE_QP_RESP_MASK_TISN = 1UL << 1,
+ MLX5_IB_CREATE_QP_RESP_MASK_RQN = 1UL << 2,
+ MLX5_IB_CREATE_QP_RESP_MASK_SQN = 1UL << 3,
+};
+
struct mlx5_ib_create_qp_resp {
__u32 bfreg_index;
__u32 reserved;
+ __u32 comp_mask;
+ __u32 tirn;
+ __u32 tisn;
+ __u32 rqn;
+ __u32 sqn;
+ __u32 reserved1;
};
struct mlx5_ib_alloc_mw {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 9c51801b9e64..408e220034de 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -125,6 +125,7 @@ enum mlx5_ib_flow_matcher_create_attrs {
MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
+ MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
};
enum mlx5_ib_flow_matcher_destroy_attrs {
@@ -155,6 +156,8 @@ enum mlx5_ib_create_flow_attrs {
MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
+ MLX5_IB_ATTR_CREATE_FLOW_TAG,
};
enum mlx5_ib_destoy_flow_attrs {
@@ -166,4 +169,22 @@ enum mlx5_ib_flow_methods {
MLX5_IB_METHOD_DESTROY_FLOW,
};
+enum mlx5_ib_flow_action_methods {
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
+};
+
+enum mlx5_ib_create_flow_action_create_modify_header_attrs {
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
+};
+
+enum mlx5_ib_create_flow_action_create_packet_reformat_attrs {
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
+};
+
#endif
diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
index 8a2fb33f3ed4..4ef62c0e8452 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -39,5 +39,17 @@ enum mlx5_ib_uapi_flow_action_flags {
MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA = 1 << 0,
};
+enum mlx5_ib_uapi_flow_table_type {
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX = 0x0,
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX = 0x1,
+};
+
+enum mlx5_ib_uapi_flow_action_packet_reformat_type {
+ MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 = 0x0,
+ MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x1,
+ MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x2,
+ MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3,
+};
+
#endif
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index edba6351ac13..f9c41bf59efc 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -227,8 +227,9 @@ enum rdma_nldev_command {
RDMA_NLDEV_CMD_UNSPEC,
RDMA_NLDEV_CMD_GET, /* can dump */
+ RDMA_NLDEV_CMD_SET,
- /* 2 - 4 are free to use */
+ /* 3 - 4 are free to use */
RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */
diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h
index 24800c6c1f32..06c34d99be85 100644
--- a/include/uapi/rdma/rdma_user_ioctl_cmds.h
+++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h
@@ -53,7 +53,7 @@ enum {
struct ib_uverbs_attr {
__u16 attr_id; /* command specific type attribute */
- __u16 len; /* only for pointers */
+ __u16 len; /* only for pointers and IDRs array */
__u16 flags; /* combination of UVERBS_ATTR_F_XXXX */
union {
struct {
@@ -63,7 +63,10 @@ struct ib_uverbs_attr {
__u16 reserved;
} attr_data;
union {
- /* Used by PTR_IN/OUT, ENUM_IN and IDR */
+ /*
+ * ptr to command, inline data, idr/fd or
+ * ptr to __u32 array of IDRs
+ */
__aligned_u64 data;
/* Used by FD_IN and FD_OUT */
__s64 data_s64;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 04b8eda94e7d..03cc59ee9c95 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -15,6 +15,7 @@
#include <linux/jhash.h>
#include <linux/filter.h>
#include <linux/rculist_nulls.h>
+#include <linux/random.h>
#include <uapi/linux/btf.h>
#include "percpu_freelist.h"
#include "bpf_lru_list.h"
@@ -41,6 +42,7 @@ struct bpf_htab {
atomic_t count; /* number of elements in this hashtable */
u32 n_buckets; /* number of hash buckets */
u32 elem_size; /* size of each element in bytes */
+ u32 hashrnd;
};
/* each htab element is struct htab_elem + key + value */
@@ -371,6 +373,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
if (!htab->buckets)
goto free_htab;
+ htab->hashrnd = get_random_int();
for (i = 0; i < htab->n_buckets; i++) {
INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
raw_spin_lock_init(&htab->buckets[i].lock);
@@ -402,9 +405,9 @@ free_htab:
return ERR_PTR(err);
}
-static inline u32 htab_map_hash(const void *key, u32 key_len)
+static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd)
{
- return jhash(key, key_len, 0);
+ return jhash(key, key_len, hashrnd);
}
static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
@@ -470,7 +473,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
key_size = map->key_size;
- hash = htab_map_hash(key, key_size);
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
head = select_bucket(htab, hash);
@@ -597,7 +600,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
if (!key)
goto find_first_elem;
- hash = htab_map_hash(key, key_size);
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
head = select_bucket(htab, hash);
@@ -824,7 +827,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
key_size = map->key_size;
- hash = htab_map_hash(key, key_size);
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
b = __select_bucket(htab, hash);
head = &b->head;
@@ -880,7 +883,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
key_size = map->key_size;
- hash = htab_map_hash(key, key_size);
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
b = __select_bucket(htab, hash);
head = &b->head;
@@ -945,7 +948,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
key_size = map->key_size;
- hash = htab_map_hash(key, key_size);
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
b = __select_bucket(htab, hash);
head = &b->head;
@@ -998,7 +1001,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
key_size = map->key_size;
- hash = htab_map_hash(key, key_size);
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
b = __select_bucket(htab, hash);
head = &b->head;
@@ -1071,7 +1074,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
key_size = map->key_size;
- hash = htab_map_hash(key, key_size);
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
b = __select_bucket(htab, hash);
head = &b->head;
@@ -1103,7 +1106,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
key_size = map->key_size;
- hash = htab_map_hash(key, key_size);
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
b = __select_bucket(htab, hash);
head = &b->head;
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 98e621a29e8e..cf5195c7c331 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -1427,12 +1427,15 @@ out:
static void smap_write_space(struct sock *sk)
{
struct smap_psock *psock;
+ void (*write_space)(struct sock *sk);
rcu_read_lock();
psock = smap_psock_sk(sk);
if (likely(psock && test_bit(SMAP_TX_RUNNING, &psock->state)))
schedule_work(&psock->tx_work);
+ write_space = psock->save_write_space;
rcu_read_unlock();
+ write_space(sk);
}
static void smap_stop_sock(struct smap_psock *psock, struct sock *sk)
@@ -2140,7 +2143,9 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
return ERR_PTR(-EPERM);
/* check sanity of attributes */
- if (attr->max_entries == 0 || attr->value_size != 4 ||
+ if (attr->max_entries == 0 ||
+ attr->key_size == 0 ||
+ attr->value_size != 4 ||
attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
@@ -2267,8 +2272,10 @@ static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
}
l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
htab->map.numa_node);
- if (!l_new)
+ if (!l_new) {
+ atomic_dec(&htab->count);
return ERR_PTR(-ENOMEM);
+ }
memcpy(l_new->key, key, key_size);
l_new->sk = sk;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index ed44d7d34c2d..aa7fe85ad62e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -102,8 +102,6 @@ static inline void cpuhp_lock_release(bool bringup) { }
* @name: Name of the step
* @startup: Startup function of the step
* @teardown: Teardown function of the step
- * @skip_onerr: Do not invoke the functions on error rollback
- * Will go away once the notifiers are gone
* @cant_stop: Bringup/teardown can't be stopped at this step
*/
struct cpuhp_step {
@@ -119,7 +117,6 @@ struct cpuhp_step {
struct hlist_node *node);
} teardown;
struct hlist_head list;
- bool skip_onerr;
bool cant_stop;
bool multi_instance;
};
@@ -550,12 +547,8 @@ static int bringup_cpu(unsigned int cpu)
static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
{
- for (st->state--; st->state > st->target; st->state--) {
- struct cpuhp_step *step = cpuhp_get_step(st->state);
-
- if (!step->skip_onerr)
- cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
- }
+ for (st->state--; st->state > st->target; st->state--)
+ cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
}
static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
@@ -644,12 +637,6 @@ static void cpuhp_thread_fun(unsigned int cpu)
WARN_ON_ONCE(!cpuhp_is_ap_state(state));
- if (st->rollback) {
- struct cpuhp_step *step = cpuhp_get_step(state);
- if (step->skip_onerr)
- goto next;
- }
-
if (cpuhp_is_atomic_state(state)) {
local_irq_disable();
st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
@@ -673,7 +660,6 @@ static void cpuhp_thread_fun(unsigned int cpu)
st->should_run = false;
}
-next:
cpuhp_lock_release(bringup);
if (!st->should_run)
@@ -916,12 +902,8 @@ void cpuhp_report_idle_dead(void)
static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
{
- for (st->state++; st->state < st->target; st->state++) {
- struct cpuhp_step *step = cpuhp_get_step(st->state);
-
- if (!step->skip_onerr)
- cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
- }
+ for (st->state++; st->state < st->target; st->state++)
+ cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
}
static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 924e37fb1620..fd6f8ed28e01 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -38,7 +38,6 @@
#include <linux/kmsg_dump.h>
#include <linux/syslog.h>
#include <linux/cpu.h>
-#include <linux/notifier.h>
#include <linux/rculist.h>
#include <linux/poll.h>
#include <linux/irq_work.h>
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 5470dce212c0..977918d5d350 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -261,7 +261,7 @@ static void __touch_watchdog(void)
* entering idle state. This should only be used for scheduler events.
* Use touch_softlockup_watchdog() for everything else.
*/
-void touch_softlockup_watchdog_sched(void)
+notrace void touch_softlockup_watchdog_sched(void)
{
/*
* Preemption can be enabled. It doesn't matter which CPU's timestamp
@@ -270,7 +270,7 @@ void touch_softlockup_watchdog_sched(void)
raw_cpu_write(watchdog_touch_ts, 0);
}
-void touch_softlockup_watchdog(void)
+notrace void touch_softlockup_watchdog(void)
{
touch_softlockup_watchdog_sched();
wq_watchdog_touch(raw_smp_processor_id());
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 1f7020d65d0a..71381168dede 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -29,7 +29,7 @@ static struct cpumask dead_events_mask;
static unsigned long hardlockup_allcpu_dumped;
static atomic_t watchdog_cpus = ATOMIC_INIT(0);
-void arch_touch_nmi_watchdog(void)
+notrace void arch_touch_nmi_watchdog(void)
{
/*
* Using __raw here because some code paths have
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 60e80198c3df..0280deac392e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -5574,7 +5574,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
mod_timer(&wq_watchdog_timer, jiffies + thresh);
}
-void wq_watchdog_touch(int cpu)
+notrace void wq_watchdog_touch(int cpu)
{
if (cpu >= 0)
per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index c72577e472f2..a66595ba5543 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -4,7 +4,6 @@
*/
#include <linux/percpu_counter.h>
-#include <linux/notifier.h>
#include <linux/mutex.h>
#include <linux/init.h>
#include <linux/cpu.h>
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 310e29b51507..30526afa8343 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -28,7 +28,6 @@
#include <linux/rhashtable.h>
#include <linux/err.h>
#include <linux/export.h>
-#include <linux/rhashtable.h>
#define HASH_DEFAULT_SIZE 64UL
#define HASH_MIN_SIZE 4U
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 6551d3b0dc30..84ae9bf5858a 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -27,7 +27,6 @@
#include <linux/mpage.h>
#include <linux/rmap.h>
#include <linux/percpu.h>
-#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e75865d58ba7..05e983f42316 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -32,7 +32,6 @@
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/oom.h>
-#include <linux/notifier.h>
#include <linux/topology.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
diff --git a/mm/slub.c b/mm/slub.c
index ce2b9e5cea77..8da34a8af53d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -19,7 +19,6 @@
#include <linux/slab.h>
#include "slab.h"
#include <linux/proc_fs.h>
-#include <linux/notifier.h>
#include <linux/seq_file.h>
#include <linux/kasan.h>
#include <linux/cpu.h>
diff --git a/net/core/dev.c b/net/core/dev.c
index 325fc5088370..82114e1111e6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -93,7 +93,6 @@
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
-#include <linux/notifier.h>
#include <linux/skbuff.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 962c4fd338ba..1c45c1d6d241 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -767,7 +767,6 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
const struct tc_action *a;
struct dsa_port *to_dp;
int err = -EOPNOTSUPP;
- LIST_HEAD(actions);
if (!ds->ops->port_mirror_add)
return err;
@@ -775,8 +774,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
if (!tcf_exts_has_one_action(cls->exts))
return err;
- tcf_exts_to_list(cls->exts, &actions);
- a = list_first_entry(&actions, struct tc_action, list);
+ a = tcf_exts_first_action(cls->exts);
if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) {
struct dsa_mall_mirror_tc_entry *mirror;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 13d34427ca3d..02ff2dde9609 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -95,11 +95,10 @@ struct bbr {
u32 mode:3, /* current bbr_mode in state machine */
prev_ca_state:3, /* CA state on previous ACK */
packet_conservation:1, /* use packet conservation? */
- restore_cwnd:1, /* decided to revert cwnd to old value */
round_start:1, /* start of packet-timed tx->ack round? */
idle_restart:1, /* restarting after idle? */
probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */
- unused:12,
+ unused:13,
lt_is_sampling:1, /* taking long-term ("LT") samples now? */
lt_rtt_cnt:7, /* round trips in long-term interval */
lt_use_bw:1; /* use lt_bw as our bw estimate? */
@@ -175,6 +174,8 @@ static const u32 bbr_lt_bw_diff = 4000 / 8;
/* If we estimate we're policed, use lt_bw for this many round trips: */
static const u32 bbr_lt_bw_max_rtts = 48;
+static void bbr_check_probe_rtt_done(struct sock *sk);
+
/* Do we estimate that STARTUP filled the pipe? */
static bool bbr_full_bw_reached(const struct sock *sk)
{
@@ -309,6 +310,8 @@ static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
*/
if (bbr->mode == BBR_PROBE_BW)
bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT);
+ else if (bbr->mode == BBR_PROBE_RTT)
+ bbr_check_probe_rtt_done(sk);
}
}
@@ -396,17 +399,11 @@ static bool bbr_set_cwnd_to_recover_or_restore(
cwnd = tcp_packets_in_flight(tp) + acked;
} else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) {
/* Exiting loss recovery; restore cwnd saved before recovery. */
- bbr->restore_cwnd = 1;
+ cwnd = max(cwnd, bbr->prior_cwnd);
bbr->packet_conservation = 0;
}
bbr->prev_ca_state = state;
- if (bbr->restore_cwnd) {
- /* Restore cwnd after exiting loss recovery or PROBE_RTT. */
- cwnd = max(cwnd, bbr->prior_cwnd);
- bbr->restore_cwnd = 0;
- }
-
if (bbr->packet_conservation) {
*new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked);
return true; /* yes, using packet conservation */
@@ -423,10 +420,10 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- u32 cwnd = 0, target_cwnd = 0;
+ u32 cwnd = tp->snd_cwnd, target_cwnd = 0;
if (!acked)
- return;
+ goto done; /* no packet fully ACKed; just apply caps */
if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd))
goto done;
@@ -748,6 +745,20 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */
}
+static void bbr_check_probe_rtt_done(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ if (!(bbr->probe_rtt_done_stamp &&
+ after(tcp_jiffies32, bbr->probe_rtt_done_stamp)))
+ return;
+
+ bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */
+ tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd);
+ bbr_reset_mode(sk);
+}
+
/* The goal of PROBE_RTT mode is to have BBR flows cooperatively and
* periodically drain the bottleneck queue, to converge to measure the true
* min_rtt (unloaded propagation delay). This allows the flows to keep queues
@@ -806,12 +817,8 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
} else if (bbr->probe_rtt_done_stamp) {
if (bbr->round_start)
bbr->probe_rtt_round_done = 1;
- if (bbr->probe_rtt_round_done &&
- after(tcp_jiffies32, bbr->probe_rtt_done_stamp)) {
- bbr->min_rtt_stamp = tcp_jiffies32;
- bbr->restore_cwnd = 1; /* snap to prior_cwnd */
- bbr_reset_mode(sk);
- }
+ if (bbr->probe_rtt_round_done)
+ bbr_check_probe_rtt_done(sk);
}
}
/* Restart after idle ends only once we process a new S/ACK for data */
@@ -862,7 +869,6 @@ static void bbr_init(struct sock *sk)
bbr->has_seen_rtt = 0;
bbr_init_pacing_rate_from_rtt(sk);
- bbr->restore_cwnd = 0;
bbr->round_start = 0;
bbr->idle_restart = 0;
bbr->full_bw_reached = 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9e041fa5c545..44c09eddbb78 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2517,6 +2517,12 @@ static int __net_init tcp_sk_init(struct net *net)
if (res)
goto fail;
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+
+ /* Please enforce IP_DF and IPID==0 for RST and
+ * ACK sent in SYN-RECV and TIME-WAIT state.
+ */
+ inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
+
*per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2fac4ad74867..d51a8c0b3372 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2398,7 +2398,7 @@ static void addrconf_add_mroute(struct net_device *dev)
ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
- ip6_route_add(&cfg, GFP_ATOMIC, NULL);
+ ip6_route_add(&cfg, GFP_KERNEL, NULL);
}
static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
@@ -3062,7 +3062,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
if (addr.s6_addr32[3]) {
add_addr(idev, &addr, plen, scope);
addrconf_prefix_route(&addr, plen, 0, idev->dev, 0, pflags,
- GFP_ATOMIC);
+ GFP_KERNEL);
return;
}
@@ -3087,7 +3087,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
add_addr(idev, &addr, plen, flag);
addrconf_prefix_route(&addr, plen, 0, idev->dev,
- 0, pflags, GFP_ATOMIC);
+ 0, pflags, GFP_KERNEL);
}
}
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index d212738e9d10..c861a6d4671d 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -198,6 +198,8 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
}
}
+ lwtstate_put(f6i->fib6_nh.nh_lwtstate);
+
if (f6i->fib6_nh.nh_dev)
dev_put(f6i->fib6_nh.nh_dev);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 38dec9da90d3..5095367c7204 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -1094,7 +1094,8 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n,
}
t = rtnl_dereference(ip6n->tnls_wc[0]);
- unregister_netdevice_queue(t->dev, list);
+ if (t)
+ unregister_netdevice_queue(t->dev, list);
}
static int __net_init vti6_init_net(struct net *net)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7208c16302f6..c4ea13e8360b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -956,7 +956,7 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
rt->dst.error = 0;
rt->dst.output = ip6_output;
- if (ort->fib6_type == RTN_LOCAL) {
+ if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
rt->dst.input = ip6_input;
} else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
rt->dst.input = ip6_mc_input;
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index 82e6edf9c5d9..45f33d6dedf7 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -100,7 +100,7 @@ static int ncsi_write_package_info(struct sk_buff *skb,
bool found;
int rc;
- if (id > ndp->package_num) {
+ if (id > ndp->package_num - 1) {
netdev_info(ndp->ndev.dev, "NCSI: No package with id %u\n", id);
return -ENODEV;
}
@@ -240,7 +240,7 @@ static int ncsi_pkg_info_all_nl(struct sk_buff *skb,
return 0; /* done */
hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
- &ncsi_genl_family, 0, NCSI_CMD_PKG_INFO);
+ &ncsi_genl_family, NLM_F_MULTI, NCSI_CMD_PKG_INFO);
if (!hdr) {
rc = -EMSGSIZE;
goto err;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 2c7b7c352d3e..b9bbcf3d6c63 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -37,7 +37,6 @@
#include <net/tcp.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
-#include <net/tcp.h>
#include <net/addrconf.h>
#include "rds.h"
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 229d63c99be2..db83dac1e7f4 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -300,21 +300,17 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
}
EXPORT_SYMBOL(tcf_generic_walker);
-static bool __tcf_idr_check(struct tc_action_net *tn, u32 index,
- struct tc_action **a, int bind)
+int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
{
struct tcf_idrinfo *idrinfo = tn->idrinfo;
struct tc_action *p;
spin_lock(&idrinfo->lock);
p = idr_find(&idrinfo->action_idr, index);
- if (IS_ERR(p)) {
+ if (IS_ERR(p))
p = NULL;
- } else if (p) {
+ else if (p)
refcount_inc(&p->tcfa_refcnt);
- if (bind)
- atomic_inc(&p->tcfa_bindcnt);
- }
spin_unlock(&idrinfo->lock);
if (p) {
@@ -323,23 +319,10 @@ static bool __tcf_idr_check(struct tc_action_net *tn, u32 index,
}
return false;
}
-
-int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
-{
- return __tcf_idr_check(tn, index, a, 0);
-}
EXPORT_SYMBOL(tcf_idr_search);
-bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
- int bind)
+static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
{
- return __tcf_idr_check(tn, index, a, bind);
-}
-EXPORT_SYMBOL(tcf_idr_check);
-
-int tcf_idr_delete_index(struct tc_action_net *tn, u32 index)
-{
- struct tcf_idrinfo *idrinfo = tn->idrinfo;
struct tc_action *p;
int ret = 0;
@@ -370,7 +353,6 @@ int tcf_idr_delete_index(struct tc_action_net *tn, u32 index)
spin_unlock(&idrinfo->lock);
return ret;
}
-EXPORT_SYMBOL(tcf_idr_delete_index);
int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
struct tc_action **a, const struct tc_action_ops *ops,
@@ -409,7 +391,6 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
p->idrinfo = idrinfo;
p->ops = ops;
- INIT_LIST_HEAD(&p->list);
*a = p;
return 0;
err3:
@@ -686,14 +667,18 @@ static int tcf_action_put(struct tc_action *p)
return __tcf_action_put(p, false);
}
+/* Put all actions in this array, skip those NULL's. */
static void tcf_action_put_many(struct tc_action *actions[])
{
int i;
- for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+ for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
struct tc_action *a = actions[i];
- const struct tc_action_ops *ops = a->ops;
+ const struct tc_action_ops *ops;
+ if (!a)
+ continue;
+ ops = a->ops;
if (tcf_action_put(a))
module_put(ops->owner);
}
@@ -1175,41 +1160,38 @@ err_out:
return err;
}
-static int tcf_action_delete(struct net *net, struct tc_action *actions[],
- int *acts_deleted, struct netlink_ext_ack *extack)
+static int tcf_action_delete(struct net *net, struct tc_action *actions[])
{
- u32 act_index;
- int ret, i;
+ int i;
for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
struct tc_action *a = actions[i];
const struct tc_action_ops *ops = a->ops;
-
/* Actions can be deleted concurrently so we must save their
* type and id to search again after reference is released.
*/
- act_index = a->tcfa_index;
+ struct tcf_idrinfo *idrinfo = a->idrinfo;
+ u32 act_index = a->tcfa_index;
if (tcf_action_put(a)) {
/* last reference, action was deleted concurrently */
module_put(ops->owner);
} else {
+ int ret;
+
/* now do the delete */
- ret = ops->delete(net, act_index);
- if (ret < 0) {
- *acts_deleted = i + 1;
+ ret = tcf_idr_delete_index(idrinfo, act_index);
+ if (ret < 0)
return ret;
- }
}
+ actions[i] = NULL;
}
- *acts_deleted = i;
return 0;
}
static int
tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
- int *acts_deleted, u32 portid, size_t attr_size,
- struct netlink_ext_ack *extack)
+ u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
{
int ret;
struct sk_buff *skb;
@@ -1227,7 +1209,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
}
/* now do the delete */
- ret = tcf_action_delete(net, actions, acts_deleted, extack);
+ ret = tcf_action_delete(net, actions);
if (ret < 0) {
NL_SET_ERR_MSG(extack, "Failed to delete TC action");
kfree_skb(skb);
@@ -1249,8 +1231,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
size_t attr_size = 0;
- struct tc_action *actions[TCA_ACT_MAX_PRIO + 1] = {};
- int acts_deleted = 0;
+ struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
if (ret < 0)
@@ -1280,14 +1261,13 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
if (event == RTM_GETACTION)
ret = tcf_get_notify(net, portid, n, actions, event, extack);
else { /* delete */
- ret = tcf_del_notify(net, n, actions, &acts_deleted, portid,
- attr_size, extack);
+ ret = tcf_del_notify(net, n, actions, portid, attr_size, extack);
if (ret)
goto err;
- return ret;
+ return 0;
}
err:
- tcf_action_put_many(&actions[acts_deleted]);
+ tcf_action_put_many(actions);
return ret;
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index d30b23e42436..0c68bc9cf0b4 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -395,13 +395,6 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
-static int tcf_bpf_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, bpf_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
static struct tc_action_ops act_bpf_ops __read_mostly = {
.kind = "bpf",
.type = TCA_ACT_BPF,
@@ -412,7 +405,6 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
.init = tcf_bpf_init,
.walk = tcf_bpf_walker,
.lookup = tcf_bpf_search,
- .delete = tcf_bpf_delete,
.size = sizeof(struct tcf_bpf),
};
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 54c0bf54f2ac..6f0f273f1139 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -198,13 +198,6 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
-static int tcf_connmark_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, connmark_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
static struct tc_action_ops act_connmark_ops = {
.kind = "connmark",
.type = TCA_ACT_CONNMARK,
@@ -214,7 +207,6 @@ static struct tc_action_ops act_connmark_ops = {
.init = tcf_connmark_init,
.walk = tcf_connmark_walker,
.lookup = tcf_connmark_search,
- .delete = tcf_connmark_delete,
.size = sizeof(struct tcf_connmark_info),
};
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index e698d3fe2080..b8a67ae3105a 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -659,13 +659,6 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act)
return nla_total_size(sizeof(struct tc_csum));
}
-static int tcf_csum_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, csum_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
static struct tc_action_ops act_csum_ops = {
.kind = "csum",
.type = TCA_ACT_CSUM,
@@ -677,7 +670,6 @@ static struct tc_action_ops act_csum_ops = {
.walk = tcf_csum_walker,
.lookup = tcf_csum_search,
.get_fill_size = tcf_csum_get_fill_size,
- .delete = tcf_csum_delete,
.size = sizeof(struct tcf_csum),
};
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 6a3f25a8ffb3..cd1d9bd32ef9 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -243,13 +243,6 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act)
return sz;
}
-static int tcf_gact_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, gact_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
static struct tc_action_ops act_gact_ops = {
.kind = "gact",
.type = TCA_ACT_GACT,
@@ -261,7 +254,6 @@ static struct tc_action_ops act_gact_ops = {
.walk = tcf_gact_walker,
.lookup = tcf_gact_search,
.get_fill_size = tcf_gact_get_fill_size,
- .delete = tcf_gact_delete,
.size = sizeof(struct tcf_gact),
};
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index d1081bdf1bdb..196430aefe87 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -167,16 +167,16 @@ static struct tcf_meta_ops *find_ife_oplist(u16 metaid)
{
struct tcf_meta_ops *o;
- read_lock_bh(&ife_mod_lock);
+ read_lock(&ife_mod_lock);
list_for_each_entry(o, &ifeoplist, list) {
if (o->metaid == metaid) {
if (!try_module_get(o->owner))
o = NULL;
- read_unlock_bh(&ife_mod_lock);
+ read_unlock(&ife_mod_lock);
return o;
}
}
- read_unlock_bh(&ife_mod_lock);
+ read_unlock(&ife_mod_lock);
return NULL;
}
@@ -190,12 +190,12 @@ int register_ife_op(struct tcf_meta_ops *mops)
!mops->get || !mops->alloc)
return -EINVAL;
- write_lock_bh(&ife_mod_lock);
+ write_lock(&ife_mod_lock);
list_for_each_entry(m, &ifeoplist, list) {
if (m->metaid == mops->metaid ||
(strcmp(mops->name, m->name) == 0)) {
- write_unlock_bh(&ife_mod_lock);
+ write_unlock(&ife_mod_lock);
return -EEXIST;
}
}
@@ -204,7 +204,7 @@ int register_ife_op(struct tcf_meta_ops *mops)
mops->release = ife_release_meta_gen;
list_add_tail(&mops->list, &ifeoplist);
- write_unlock_bh(&ife_mod_lock);
+ write_unlock(&ife_mod_lock);
return 0;
}
EXPORT_SYMBOL_GPL(unregister_ife_op);
@@ -214,7 +214,7 @@ int unregister_ife_op(struct tcf_meta_ops *mops)
struct tcf_meta_ops *m;
int err = -ENOENT;
- write_lock_bh(&ife_mod_lock);
+ write_lock(&ife_mod_lock);
list_for_each_entry(m, &ifeoplist, list) {
if (m->metaid == mops->metaid) {
list_del(&mops->list);
@@ -222,7 +222,7 @@ int unregister_ife_op(struct tcf_meta_ops *mops)
break;
}
}
- write_unlock_bh(&ife_mod_lock);
+ write_unlock(&ife_mod_lock);
return err;
}
@@ -265,11 +265,8 @@ static const char *ife_meta_id2name(u32 metaid)
#endif
/* called when adding new meta information
- * under ife->tcf_lock for existing action
*/
-static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
- void *val, int len, bool exists,
- bool rtnl_held)
+static int load_metaops_and_vet(u32 metaid, void *val, int len, bool rtnl_held)
{
struct tcf_meta_ops *ops = find_ife_oplist(metaid);
int ret = 0;
@@ -277,15 +274,11 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
if (!ops) {
ret = -ENOENT;
#ifdef CONFIG_MODULES
- if (exists)
- spin_unlock_bh(&ife->tcf_lock);
if (rtnl_held)
rtnl_unlock();
request_module("ife-meta-%s", ife_meta_id2name(metaid));
if (rtnl_held)
rtnl_lock();
- if (exists)
- spin_lock_bh(&ife->tcf_lock);
ops = find_ife_oplist(metaid);
#endif
}
@@ -302,24 +295,17 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
}
/* called when adding new meta information
- * under ife->tcf_lock for existing action
*/
-static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
- int len, bool atomic)
+static int __add_metainfo(const struct tcf_meta_ops *ops,
+ struct tcf_ife_info *ife, u32 metaid, void *metaval,
+ int len, bool atomic, bool exists)
{
struct tcf_meta_info *mi = NULL;
- struct tcf_meta_ops *ops = find_ife_oplist(metaid);
int ret = 0;
- if (!ops)
- return -ENOENT;
-
mi = kzalloc(sizeof(*mi), atomic ? GFP_ATOMIC : GFP_KERNEL);
- if (!mi) {
- /*put back what find_ife_oplist took */
- module_put(ops->owner);
+ if (!mi)
return -ENOMEM;
- }
mi->metaid = metaid;
mi->ops = ops;
@@ -327,29 +313,47 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
ret = ops->alloc(mi, metaval, atomic ? GFP_ATOMIC : GFP_KERNEL);
if (ret != 0) {
kfree(mi);
- module_put(ops->owner);
return ret;
}
}
+ if (exists)
+ spin_lock_bh(&ife->tcf_lock);
list_add_tail(&mi->metalist, &ife->metalist);
+ if (exists)
+ spin_unlock_bh(&ife->tcf_lock);
+
+ return ret;
+}
+
+static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
+ int len, bool exists)
+{
+ const struct tcf_meta_ops *ops = find_ife_oplist(metaid);
+ int ret;
+ if (!ops)
+ return -ENOENT;
+ ret = __add_metainfo(ops, ife, metaid, metaval, len, false, exists);
+ if (ret)
+ /*put back what find_ife_oplist took */
+ module_put(ops->owner);
return ret;
}
-static int use_all_metadata(struct tcf_ife_info *ife)
+static int use_all_metadata(struct tcf_ife_info *ife, bool exists)
{
struct tcf_meta_ops *o;
int rc = 0;
int installed = 0;
- read_lock_bh(&ife_mod_lock);
+ read_lock(&ife_mod_lock);
list_for_each_entry(o, &ifeoplist, list) {
- rc = add_metainfo(ife, o->metaid, NULL, 0, true);
+ rc = __add_metainfo(o, ife, o->metaid, NULL, 0, true, exists);
if (rc == 0)
installed += 1;
}
- read_unlock_bh(&ife_mod_lock);
+ read_unlock(&ife_mod_lock);
if (installed)
return 0;
@@ -422,7 +426,6 @@ static void tcf_ife_cleanup(struct tc_action *a)
kfree_rcu(p, rcu);
}
-/* under ife->tcf_lock for existing action */
static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
bool exists, bool rtnl_held)
{
@@ -436,8 +439,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
val = nla_data(tb[i]);
len = nla_len(tb[i]);
- rc = load_metaops_and_vet(ife, i, val, len, exists,
- rtnl_held);
+ rc = load_metaops_and_vet(i, val, len, rtnl_held);
if (rc != 0)
return rc;
@@ -540,8 +542,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
p->eth_type = ife_type;
}
- if (exists)
- spin_lock_bh(&ife->tcf_lock);
if (ret == ACT_P_CREATED)
INIT_LIST_HEAD(&ife->metalist);
@@ -551,10 +551,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
NULL, NULL);
if (err) {
metadata_parse_err:
- if (exists)
- spin_unlock_bh(&ife->tcf_lock);
tcf_idr_release(*a, bind);
-
kfree(p);
return err;
}
@@ -569,17 +566,16 @@ metadata_parse_err:
* as we can. You better have at least one else we are
* going to bail out
*/
- err = use_all_metadata(ife);
+ err = use_all_metadata(ife, exists);
if (err) {
- if (exists)
- spin_unlock_bh(&ife->tcf_lock);
tcf_idr_release(*a, bind);
-
kfree(p);
return err;
}
}
+ if (exists)
+ spin_lock_bh(&ife->tcf_lock);
ife->tcf_action = parm->action;
/* protected by tcf_lock when modifying existing action */
rcu_swap_protected(ife->params, p, 1);
@@ -853,13 +849,6 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
-static int tcf_ife_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, ife_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
static struct tc_action_ops act_ife_ops = {
.kind = "ife",
.type = TCA_ACT_IFE,
@@ -870,7 +859,6 @@ static struct tc_action_ops act_ife_ops = {
.init = tcf_ife_init,
.walk = tcf_ife_walker,
.lookup = tcf_ife_search,
- .delete = tcf_ife_delete,
.size = sizeof(struct tcf_ife_info),
};
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 51f235bbeb5b..23273b5303fd 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -337,13 +337,6 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
-static int tcf_ipt_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, ipt_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
static struct tc_action_ops act_ipt_ops = {
.kind = "ipt",
.type = TCA_ACT_IPT,
@@ -354,7 +347,6 @@ static struct tc_action_ops act_ipt_ops = {
.init = tcf_ipt_init,
.walk = tcf_ipt_walker,
.lookup = tcf_ipt_search,
- .delete = tcf_ipt_delete,
.size = sizeof(struct tcf_ipt),
};
@@ -395,13 +387,6 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
-static int tcf_xt_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, xt_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
static struct tc_action_ops act_xt_ops = {
.kind = "xt",
.type = TCA_ACT_XT,
@@ -412,7 +397,6 @@ static struct tc_action_ops act_xt_ops = {
.init = tcf_xt_init,
.walk = tcf_xt_walker,
.lookup = tcf_xt_search,
- .delete = tcf_xt_delete,
.size = sizeof(struct tcf_ipt),
};
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 38fd20f10f67..8bf66d0a6800 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -395,13 +395,6 @@ static void tcf_mirred_put_dev(struct net_device *dev)
dev_put(dev);
}
-static int tcf_mirred_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, mirred_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
static struct tc_action_ops act_mirred_ops = {
.kind = "mirred",
.type = TCA_ACT_MIRRED,
@@ -416,7 +409,6 @@ static struct tc_action_ops act_mirred_ops = {
.size = sizeof(struct tcf_mirred),
.get_dev = tcf_mirred_get_dev,
.put_dev = tcf_mirred_put_dev,
- .delete = tcf_mirred_delete,
};
static __net_init int mirred_init_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 822e903bfc25..4313aa102440 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -300,13 +300,6 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
-static int tcf_nat_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, nat_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
static struct tc_action_ops act_nat_ops = {
.kind = "nat",
.type = TCA_ACT_NAT,
@@ -316,7 +309,6 @@ static struct tc_action_ops act_nat_ops = {
.init = tcf_nat_init,
.walk = tcf_nat_walker,
.lookup = tcf_nat_search,
- .delete = tcf_nat_delete,
.size = sizeof(struct tcf_nat),
};
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 8a7a7cb94e83..107034070019 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -460,13 +460,6 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
-static int tcf_pedit_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, pedit_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
static struct tc_action_ops act_pedit_ops = {
.kind = "pedit",
.type = TCA_ACT_PEDIT,
@@ -477,7 +470,6 @@ static struct tc_action_ops act_pedit_ops = {
.init = tcf_pedit_init,
.walk = tcf_pedit_walker,
.lookup = tcf_pedit_search,
- .delete = tcf_pedit_delete,
.size = sizeof(struct tcf_pedit),
};
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 06f0742db593..5d8bfa878477 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -320,13 +320,6 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
-static int tcf_police_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, police_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
MODULE_AUTHOR("Alexey Kuznetsov");
MODULE_DESCRIPTION("Policing actions");
MODULE_LICENSE("GPL");
@@ -340,7 +333,6 @@ static struct tc_action_ops act_police_ops = {
.init = tcf_police_init,
.walk = tcf_police_walker,
.lookup = tcf_police_search,
- .delete = tcf_police_delete,
.size = sizeof(struct tcf_police),
};
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 207b4132d1b0..44e9c00657bc 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -232,13 +232,6 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
-static int tcf_sample_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, sample_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
static struct tc_action_ops act_sample_ops = {
.kind = "sample",
.type = TCA_ACT_SAMPLE,
@@ -249,7 +242,6 @@ static struct tc_action_ops act_sample_ops = {
.cleanup = tcf_sample_cleanup,
.walk = tcf_sample_walker,
.lookup = tcf_sample_search,
- .delete = tcf_sample_delete,
.size = sizeof(struct tcf_sample),
};
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index e616523ba3c1..52400d49f81f 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -196,13 +196,6 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
-static int tcf_simp_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, simp_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
static struct tc_action_ops act_simp_ops = {
.kind = "simple",
.type = TCA_ACT_SIMP,
@@ -213,7 +206,6 @@ static struct tc_action_ops act_simp_ops = {
.init = tcf_simp_init,
.walk = tcf_simp_walker,
.lookup = tcf_simp_search,
- .delete = tcf_simp_delete,
.size = sizeof(struct tcf_defact),
};
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 926d7bc4a89d..73e44ce2a883 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -299,13 +299,6 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
-static int tcf_skbedit_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, skbedit_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
static struct tc_action_ops act_skbedit_ops = {
.kind = "skbedit",
.type = TCA_ACT_SKBEDIT,
@@ -316,7 +309,6 @@ static struct tc_action_ops act_skbedit_ops = {
.cleanup = tcf_skbedit_cleanup,
.walk = tcf_skbedit_walker,
.lookup = tcf_skbedit_search,
- .delete = tcf_skbedit_delete,
.size = sizeof(struct tcf_skbedit),
};
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index d6a1af0c4171..588077fafd6c 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -259,13 +259,6 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
-static int tcf_skbmod_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, skbmod_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
static struct tc_action_ops act_skbmod_ops = {
.kind = "skbmod",
.type = TCA_ACT_SKBMOD,
@@ -276,7 +269,6 @@ static struct tc_action_ops act_skbmod_ops = {
.cleanup = tcf_skbmod_cleanup,
.walk = tcf_skbmod_walker,
.lookup = tcf_skbmod_search,
- .delete = tcf_skbmod_delete,
.size = sizeof(struct tcf_skbmod),
};
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 8f09cf08d8fe..420759153d5f 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -548,13 +548,6 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
-static int tunnel_key_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
static struct tc_action_ops act_tunnel_key_ops = {
.kind = "tunnel_key",
.type = TCA_ACT_TUNNEL_KEY,
@@ -565,7 +558,6 @@ static struct tc_action_ops act_tunnel_key_ops = {
.cleanup = tunnel_key_release,
.walk = tunnel_key_walker,
.lookup = tunnel_key_search,
- .delete = tunnel_key_delete,
.size = sizeof(struct tcf_tunnel_key),
};
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 209e70ad2c09..033d273afe50 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -296,13 +296,6 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
-static int tcf_vlan_delete(struct net *net, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, vlan_net_id);
-
- return tcf_idr_delete_index(tn, index);
-}
-
static struct tc_action_ops act_vlan_ops = {
.kind = "vlan",
.type = TCA_ACT_VLAN,
@@ -313,7 +306,6 @@ static struct tc_action_ops act_vlan_ops = {
.cleanup = tcf_vlan_cleanup,
.walk = tcf_vlan_walker,
.lookup = tcf_vlan_search,
- .delete = tcf_vlan_delete,
.size = sizeof(struct tcf_vlan),
};
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index d5d2a6dc3921..f218ccf1e2d9 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -914,6 +914,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_U32_MAX + 1];
u32 htid, flags = 0;
+ size_t sel_size;
int err;
#ifdef CONFIG_CLS_U32_PERF
size_t size;
@@ -1076,8 +1077,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
}
s = nla_data(tb[TCA_U32_SEL]);
+ sel_size = struct_size(s, keys, s->nkeys);
+ if (nla_len(tb[TCA_U32_SEL]) < sel_size) {
+ err = -EINVAL;
+ goto erridr;
+ }
- n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
+ n = kzalloc(offsetof(typeof(*n), sel) + sel_size, GFP_KERNEL);
if (n == NULL) {
err = -ENOBUFS;
goto erridr;
@@ -1092,7 +1098,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
}
#endif
- memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
+ memcpy(&n->sel, s, sel_size);
RCU_INIT_POINTER(n->ht_up, ht);
n->handle = handle;
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 35fc7252187c..c07c30b916d5 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -64,7 +64,6 @@
#include <linux/vmalloc.h>
#include <linux/reciprocal_div.h>
#include <net/netlink.h>
-#include <linux/version.h>
#include <linux/if_vlan.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
@@ -621,15 +620,20 @@ static bool cake_ddst(int flow_mode)
}
static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
- int flow_mode)
+ int flow_mode, u16 flow_override, u16 host_override)
{
- u32 flow_hash = 0, srchost_hash, dsthost_hash;
+ u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0;
u16 reduced_hash, srchost_idx, dsthost_idx;
struct flow_keys keys, host_keys;
if (unlikely(flow_mode == CAKE_FLOW_NONE))
return 0;
+ /* If both overrides are set we can skip packet dissection entirely */
+ if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) &&
+ (host_override || !(flow_mode & CAKE_FLOW_HOSTS)))
+ goto skip_hash;
+
skb_flow_dissect_flow_keys(skb, &keys,
FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
@@ -676,6 +680,14 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
if (flow_mode & CAKE_FLOW_FLOWS)
flow_hash = flow_hash_from_keys(&keys);
+skip_hash:
+ if (flow_override)
+ flow_hash = flow_override - 1;
+ if (host_override) {
+ dsthost_hash = host_override - 1;
+ srchost_hash = host_override - 1;
+ }
+
if (!(flow_mode & CAKE_FLOW_FLOWS)) {
if (flow_mode & CAKE_FLOW_SRC_IP)
flow_hash ^= srchost_hash;
@@ -1571,7 +1583,7 @@ static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t,
struct cake_sched_data *q = qdisc_priv(sch);
struct tcf_proto *filter;
struct tcf_result res;
- u32 flow = 0;
+ u16 flow = 0, host = 0;
int result;
filter = rcu_dereference_bh(q->filter_list);
@@ -1595,10 +1607,12 @@ static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t,
#endif
if (TC_H_MIN(res.classid) <= CAKE_QUEUES)
flow = TC_H_MIN(res.classid);
+ if (TC_H_MAJ(res.classid) <= (CAKE_QUEUES << 16))
+ host = TC_H_MAJ(res.classid) >> 16;
}
hash:
*t = cake_select_tin(sch, skb);
- return flow ?: cake_hash(*t, skb, flow_mode) + 1;
+ return cake_hash(*t, skb, flow_mode, flow, host) + 1;
}
static void cake_reconfigure(struct Qdisc *sch);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 93c0c225ab34..180b6640e531 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -213,9 +213,14 @@ static void tls_write_space(struct sock *sk)
{
struct tls_context *ctx = tls_get_ctx(sk);
- /* We are already sending pages, ignore notification */
- if (ctx->in_tcp_sendpages)
+ /* If in_tcp_sendpages call lower protocol write space handler
+ * to ensure we wake up any waiting operations there. For example
+ * if do_tcp_sendpages where to call sk_wait_event.
+ */
+ if (ctx->in_tcp_sendpages) {
+ ctx->sk_write_space(sk);
return;
+ }
if (!sk->sk_write_pending && tls_is_pending_closed_record(ctx)) {
gfp_t sk_allocation = sk->sk_allocation;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 911ca6d3cb5a..bfe2dbea480b 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -74,14 +74,14 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
return 0;
if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit)
- return force_zc ? -ENOTSUPP : 0; /* fail or fallback */
+ return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */
bpf.command = XDP_QUERY_XSK_UMEM;
rtnl_lock();
err = xdp_umem_query(dev, queue_id);
if (err) {
- err = err < 0 ? -ENOTSUPP : -EBUSY;
+ err = err < 0 ? -EOPNOTSUPP : -EBUSY;
goto err_rtnl_unlock;
}
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index c75413d05a63..ce53639a864a 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -153,10 +153,6 @@ cc-fullversion = $(shell $(CONFIG_SHELL) \
# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4))
-# cc-if-fullversion
-# Usage: EXTRA_CFLAGS += $(call cc-if-fullversion, -lt, 040502, -O1)
-cc-if-fullversion = $(shell [ $(cc-fullversion) $(1) $(2) ] && echo $(3) || echo $(4))
-
# cc-ldoption
# Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both)
cc-ldoption = $(call try-run,\
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 1c48572223d1..5a2d1c9578a0 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -246,8 +246,6 @@ objtool_args += --no-fp
endif
ifdef CONFIG_GCOV_KERNEL
objtool_args += --no-unreachable
-else
-objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable)
endif
ifdef CONFIG_RETPOLINE
ifneq ($(RETPOLINE_CFLAGS),)
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
index 1832100d1b27..6d41323be291 100644
--- a/tools/bpf/bpftool/map_perf_ring.c
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -194,8 +194,10 @@ int do_event_pipe(int argc, char **argv)
}
while (argc) {
- if (argc < 2)
+ if (argc < 2) {
BAD_ARG();
+ goto err_close_map;
+ }
if (is_prefix(*argv, "cpu")) {
char *endptr;
@@ -221,6 +223,7 @@ int do_event_pipe(int argc, char **argv)
NEXT_ARG();
} else {
BAD_ARG();
+ goto err_close_map;
}
do_all = false;
OpenPOWER on IntegriCloud