From 484deaedfa03fa0d10e0b1ad3facc910ccc951a8 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Fri, 26 Jul 2019 15:57:50 -0500 Subject: drm/amdgpu: Extends amdgpu vm definitions (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add RW mtype introduced for arcturus. v2: * Don't add probe-invalidation bit from UAPI * Don't add unused AMDGPU_MTYPE_ definitions Signed-off-by: Oak Zeng Signed-off-by: Felix Kuehling Reviewed-by: Christian König Reviewed-by: Shaoyun Liu Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index c99b4f2482c6..640e5356fd65 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -500,6 +500,8 @@ struct drm_amdgpu_gem_op { #define AMDGPU_VM_MTYPE_CC (3 << 5) /* Use UC MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_UC (4 << 5) +/* Use RW MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_RW (5 << 5) struct drm_amdgpu_gem_va { /** GEM object handle */ -- cgit v1.2.3 From 455d56ce809fcc540dc029a05db074855269dc33 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Thu, 19 Sep 2019 09:10:16 +0200 Subject: drm/v3d: clean caches at the end of render jobs on request from user space Extends the user space ioctl for CL submissions so it can include a request to flush the cache once the CL execution has completed. Fixes memory write violation messages reported by the kernel in workloads involving shader memory writes (SSBOs, shader images, scratch, etc) which sometimes also lead to GPU resets during Piglit and CTS workloads. v2: if v3d_job_init() fails we need to kfree() the job instead of v3d_job_put() it (Eric Anholt). v3 (Eric Anholt): - Drop _FLAG suffix from the new flag name. - Add a new param so userspace can tell whether cache flushing is implemented in the kernel. Signed-off-by: Iago Toral Quiroga Reviewed-by: Eric Anholt Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20190919071016.4578-1-itoral@igalia.com --- drivers/gpu/drm/v3d/v3d_drv.c | 3 +++ drivers/gpu/drm/v3d/v3d_gem.c | 54 ++++++++++++++++++++++++++++++++++++------- include/uapi/drm/v3d_drm.h | 8 ++++--- 3 files changed, 54 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index 3506ae2723ae..e94bf75368be 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -126,6 +126,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data, case DRM_V3D_PARAM_SUPPORTS_CSD: args->value = v3d_has_csd(v3d); return 0; + case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH: + args->value = 1; + return 0; default: DRM_DEBUG("Unknown parameter %d\n", args->param); return -EINVAL; diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index fb32cda18ffe..4c4b59ae2c81 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -530,13 +530,16 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, struct drm_v3d_submit_cl *args = data; struct v3d_bin_job *bin = NULL; struct v3d_render_job *render; + struct v3d_job *clean_job = NULL; + struct v3d_job *last_job; struct ww_acquire_ctx acquire_ctx; int ret = 0; trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); - if (args->pad != 0) { - DRM_INFO("pad must be zero: %d\n", args->pad); + if (args->flags != 0 && + args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE) { + DRM_INFO("invalid flags: %d\n", args->flags); return -EINVAL; } @@ -576,12 +579,31 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, bin->render = render; } - ret = v3d_lookup_bos(dev, file_priv, &render->base, + if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) { + clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL); + if (!clean_job) { + ret = -ENOMEM; + goto fail; + } + + ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0); + if (ret) { + kfree(clean_job); + clean_job = NULL; + goto fail; + } + + last_job = clean_job; + } else { + last_job = &render->base; + } + + ret = v3d_lookup_bos(dev, file_priv, last_job, args->bo_handles, args->bo_handle_count); if (ret) goto fail; - ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx); + ret = v3d_lock_bo_reservations(last_job, &acquire_ctx); if (ret) goto fail; @@ -600,28 +622,44 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER); if (ret) goto fail_unreserve; + + if (clean_job) { + struct dma_fence *render_fence = + dma_fence_get(render->base.done_fence); + ret = drm_gem_fence_array_add(&clean_job->deps, render_fence); + if (ret) + goto fail_unreserve; + ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN); + if (ret) + goto fail_unreserve; + } + mutex_unlock(&v3d->sched_lock); v3d_attach_fences_and_unlock_reservation(file_priv, - &render->base, + last_job, &acquire_ctx, args->out_sync, - render->base.done_fence); + last_job->done_fence); if (bin) v3d_job_put(&bin->base); v3d_job_put(&render->base); + if (clean_job) + v3d_job_put(clean_job); return 0; fail_unreserve: mutex_unlock(&v3d->sched_lock); - drm_gem_unlock_reservations(render->base.bo, - render->base.bo_count, &acquire_ctx); + drm_gem_unlock_reservations(last_job->bo, + last_job->bo_count, &acquire_ctx); fail: if (bin) v3d_job_put(&bin->base); v3d_job_put(&render->base); + if (clean_job) + v3d_job_put(clean_job); return ret; } diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 58fbe48c91e9..1ce746e228d9 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -48,6 +48,8 @@ extern "C" { #define DRM_IOCTL_V3D_SUBMIT_TFU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu) #define DRM_IOCTL_V3D_SUBMIT_CSD DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd) +#define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 + /** * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D * engine. @@ -61,7 +63,7 @@ extern "C" { * flushed by the time the render done IRQ happens, which is the * trigger for out_sync. Any dirtying of cachelines by the job (only * possible using TMU writes) must be flushed by the caller using the - * CL's cache flush commands. + * DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG flag. */ struct drm_v3d_submit_cl { /* Pointer to the binner command list. @@ -124,8 +126,7 @@ struct drm_v3d_submit_cl { /* Number of BO handles passed in (size is that times 4). */ __u32 bo_handle_count; - /* Pad, must be zero-filled. */ - __u32 pad; + __u32 flags; }; /** @@ -193,6 +194,7 @@ enum drm_v3d_param { DRM_V3D_PARAM_V3D_CORE0_IDENT2, DRM_V3D_PARAM_SUPPORTS_TFU, DRM_V3D_PARAM_SUPPORTS_CSD, + DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH, }; struct drm_v3d_get_param { -- cgit v1.2.3 From 601734f7aabd46d3a988554e59908d9de7f8b013 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 13 Sep 2019 08:51:37 +0100 Subject: drm/i915/tgl: s/ss/eu fuse reading support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gen12 has dual-subslices (DSS), which compared to gen11 subslices have some duplicated resources/paths. Although DSS behave similarly to 2 subslices, instead of splitting this and presenting userspace with bits not directly representative of hardware resources, present userspace with a subslice_mask made up of DSS bits instead. v2: GEM_BUG_ON on mask size (Lionel) Bspec: 29547 Bspec: 12247 Cc: Kelvin Gardiner Cc: Tvrtko Ursulin Cc: Lionel Landwerlin CC: Radhakrishna Sripada Cc: Michel Thierry #v1 Cc: Daniele Ceraolo Spurio Cc: José Roberto de Souza Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: James Ausmus Signed-off-by: Oscar Mateo Signed-off-by: Sudeep Dutt Signed-off-by: Stuart Summers Signed-off-by: Mika Kuoppala Acked-by: Lionel Landwerlin Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20190913075137.18476-2-chris@chris-wilson.co.uk Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_sseu.h | 9 +--- drivers/gpu/drm/i915/i915_debugfs.c | 3 +- drivers/gpu/drm/i915/i915_reg.h | 2 + drivers/gpu/drm/i915/intel_device_info.c | 83 ++++++++++++++++++++++++-------- include/uapi/drm/i915_drm.h | 6 ++- 5 files changed, 72 insertions(+), 31 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 4070f6ff1db6..d1d225204f09 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -18,12 +18,13 @@ struct drm_i915_private; #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) -#define GEN_MAX_EUS (10) /* HSW upper bound */ +#define GEN_MAX_EUS (16) /* TGL upper bound */ #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) struct sseu_dev_info { u8 slice_mask; u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; u16 eu_total; u8 eu_per_subslice; u8 min_eu_in_pool; @@ -40,12 +41,6 @@ struct sseu_dev_info { u8 ss_stride; u8 eu_stride; - - /* We don't have more than 8 eus per subslice at the moment and as we - * store eus enabled using bits, no need to multiply by eus per - * subslice. - */ - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; }; /* diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 43db50095257..b5b449a88cf1 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3823,7 +3823,8 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; - if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) + if (info->sseu.has_subslice_pg && + !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) /* skip disabled subslice */ continue; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index acbda6d89a9b..7c64768976ac 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2956,6 +2956,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C) + #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 728c881718a2..85e480bdc673 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -182,13 +182,69 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) return total; } +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, + u8 s_en, u32 ss_en, u16 eu_en) +{ + int s, ss; + + /* ss_en represents entire subslice mask across all slices */ + GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > + sizeof(ss_en) * BITS_PER_BYTE); + + for (s = 0; s < sseu->max_slices; s++) { + if ((s_en & BIT(s)) == 0) + continue; + + sseu->slice_mask |= BIT(s); + + intel_sseu_set_subslices(sseu, s, ss_en); + + for (ss = 0; ss < sseu->max_subslices; ss++) + if (intel_sseu_has_subslice(sseu, s, ss)) + sseu_set_eus(sseu, s, ss, eu_en); + } + sseu->eu_per_subslice = hweight16(eu_en); + sseu->eu_total = compute_eu_total(sseu); +} + +static void gen12_sseu_info_init(struct drm_i915_private *dev_priv) +{ + struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; + u8 s_en; + u32 dss_en; + u16 eu_en = 0; + u8 eu_en_fuse; + int eu; + + /* + * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. + * Instead of splitting these, provide userspace with an array + * of DSS to more closely represent the hardware resource. + */ + intel_sseu_set_info(sseu, 1, 6, 16); + + s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; + + dss_en = I915_READ(GEN12_GT_DSS_ENABLE); + + /* one bit per pair of EUs */ + eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) + if (eu_en_fuse & BIT(eu)) + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); + + gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); + + /* TGL only supports slice-level power gating */ + sseu->has_slice_pg = 1; +} + static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; u8 s_en; - u32 ss_en, ss_en_mask; + u32 ss_en; u8 eu_en; - int s; if (IS_ELKHARTLAKE(dev_priv)) intel_sseu_set_info(sseu, 1, 4, 8); @@ -197,26 +253,9 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE); - ss_en_mask = BIT(sseu->max_subslices) - 1; eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); - for (s = 0; s < sseu->max_slices; s++) { - if (s_en & BIT(s)) { - int ss_idx = sseu->max_subslices * s; - int ss; - - sseu->slice_mask |= BIT(s); - - intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) & - ss_en_mask); - - for (ss = 0; ss < sseu->max_subslices; ss++) - if (intel_sseu_has_subslice(sseu, s, ss)) - sseu_set_eus(sseu, s, ss, eu_en); - } - } - sseu->eu_per_subslice = hweight8(eu_en); - sseu->eu_total = compute_eu_total(sseu); + gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); /* ICL has no power gating restrictions. */ sseu->has_slice_pg = 1; @@ -955,8 +994,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) gen9_sseu_info_init(dev_priv); else if (IS_GEN(dev_priv, 10)) gen10_sseu_info_init(dev_priv); - else if (INTEL_GEN(dev_priv) >= 11) + else if (IS_GEN(dev_priv, 11)) gen11_sseu_info_init(dev_priv); + else if (INTEL_GEN(dev_priv) >= 12) + gen12_sseu_info_init(dev_priv); if (IS_GEN(dev_priv, 6) && intel_vtd_active()) { DRM_INFO("Disabling ppGTT for VT-d support\n"); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 469dc512cca3..30c542144016 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2033,8 +2033,10 @@ struct drm_i915_query { * (data[X / 8] >> (X % 8)) & 1 * * - the subslice mask for each slice with one bit per subslice telling - * whether a subslice is available. The availability of subslice Y in slice - * X can be queried with the following formula : + * whether a subslice is available. Gen12 has dual-subslices, which are + * similar to two gen11 subslices. For gen12, this array represents dual- + * subslices. The availability of subslice Y in slice X can be queried + * with the following formula : * * (data[subslice_offset + * X * subslice_stride + -- cgit v1.2.3 From cf21e76a6005016474623a375ae2e90364a02168 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 24 Sep 2019 17:53:25 -0400 Subject: drm/amdgpu: return tcc_disabled_mask to userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit UMDs need this for correct programming of harvested chips. Signed-off-by: Marek Olšák Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 12 ++++++++++++ include/uapi/drm/amdgpu_drm.h | 2 ++ 5 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 9f2f54e5e06d..2ef72309e5ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -82,9 +82,10 @@ * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS. * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS. * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches + * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 34 +#define KMS_DRIVER_MINOR 35 #define KMS_DRIVER_PATCHLEVEL 0 #define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index c5179a807a04..35eff9e6ce16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -165,6 +165,7 @@ struct amdgpu_gfx_config { uint32_t num_sc_per_sh; uint32_t num_packer_per_sc; uint32_t pa_sc_tile_steering_override; + uint64_t tcc_disabled_mask; }; struct amdgpu_cu_info { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 7f4e000a400f..6746967f5e80 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -782,6 +782,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.pa_sc_tile_steering_override = adev->gfx.config.pa_sc_tile_steering_override; + dev_info.tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask; + return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index cfc0952f6175..0fcea94f0a4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -1691,6 +1691,17 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) } } +static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) +{ + /* TCCs are global (not instanced). */ + uint32_t tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE) | + RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE); + + adev->gfx.config.tcc_disabled_mask = + REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | + (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); +} + static void gfx_v10_0_constants_init(struct amdgpu_device *adev) { u32 tmp; @@ -1702,6 +1713,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) gfx_v10_0_setup_rb(adev); gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info); + gfx_v10_0_get_tcc_info(adev); adev->gfx.config.pa_sc_tile_steering_override = gfx_v10_0_init_pa_sc_tile_steering_override(adev); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 640e5356fd65..bbdad866e3fe 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1005,6 +1005,8 @@ struct drm_amdgpu_info_device { __u64 high_va_max; /* gfx10 pa_sc_tile_steering_override */ __u32 pa_sc_tile_steering_override; + /* disabled TCCs */ + __u64 tcc_disabled_mask; }; struct drm_amdgpu_info_hw_ip { -- cgit v1.2.3 From ba2a1c8706151ac3234d2d020873feab498ab1bb Mon Sep 17 00:00:00 2001 From: Raymond Smith Date: Fri, 4 Oct 2019 14:12:38 +0000 Subject: drm/fourcc: Add Arm 16x16 block modifier Add the DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED modifier to denote the 16x16 block u-interleaved format used in Arm Utgard and Midgard GPUs. Changes from v1:- 1. Reserved the upper four bits (out of the 56 bits assigned to each vendor) to denote the category of Arm specific modifiers. Currently, we have two categories ie AFBC and MISC. Changes from v2:- 1. Preserved Ray's authorship 2. Cleanups/changes suggested by Brian 3. Added r-bs of Brian and Qiang Signed-off-by: Raymond Smith Reviewed-by: Brian Starkey Reviewed-by: Qiang Yu Signed-off-by: Ayan kumar halder Link: https://patchwork.freedesktop.org/patch/msgid/20191004141222.22337-1-ayan.halder@arm.com --- include/uapi/drm/drm_fourcc.h | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 3feeaa3f987a..2376d36ea573 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -648,7 +648,21 @@ extern "C" { * Further information on the use of AFBC modifiers can be found in * Documentation/gpu/afbc.rst */ -#define DRM_FORMAT_MOD_ARM_AFBC(__afbc_mode) fourcc_mod_code(ARM, __afbc_mode) + +/* + * The top 4 bits (out of the 56 bits alloted for specifying vendor specific + * modifiers) denote the category for modifiers. Currently we have only two + * categories of modifiers ie AFBC and MISC. We can have a maximum of sixteen + * different categories. + */ +#define DRM_FORMAT_MOD_ARM_CODE(__type, __val) \ + fourcc_mod_code(ARM, ((__u64)(__type) << 52) | ((__val) & 0x000fffffffffffffULL)) + +#define DRM_FORMAT_MOD_ARM_TYPE_AFBC 0x00 +#define DRM_FORMAT_MOD_ARM_TYPE_MISC 0x01 + +#define DRM_FORMAT_MOD_ARM_AFBC(__afbc_mode) \ + DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_AFBC, __afbc_mode) /* * AFBC superblock size @@ -742,6 +756,16 @@ extern "C" { */ #define AFBC_FORMAT_MOD_BCH (1ULL << 11) +/* + * Arm 16x16 Block U-Interleaved modifier + * + * This is used by Arm Mali Utgard and Midgard GPUs. It divides the image + * into 16x16 pixel blocks. Blocks are stored linearly in order, but pixels + * in the block are reordered. + */ +#define DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED \ + DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_MISC, 1ULL) + /* * Allwinner tiled modifier * -- cgit v1.2.3 From 9b7117e245bcf37b6e9f87f61461168e09f25316 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 10 Oct 2019 13:59:56 +0200 Subject: drm/omap: cleanup OMAP_BO flags Reorder OMAP_BO flags and improve the comments. Signed-off-by: Tomi Valkeinen Reviewed-by: Jean-Jacques Hiblot Link: https://patchwork.freedesktop.org/patch/msgid/20191010120000.1421-5-jjhiblot@ti.com --- include/uapi/drm/omap_drm.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h index 1fccffef9e27..d8ee2f840697 100644 --- a/include/uapi/drm/omap_drm.h +++ b/include/uapi/drm/omap_drm.h @@ -38,19 +38,20 @@ struct drm_omap_param { __u64 value; /* in (set_param), out (get_param) */ }; -#define OMAP_BO_SCANOUT 0x00000001 /* scanout capable (phys contiguous) */ -#define OMAP_BO_CACHE_MASK 0x00000006 /* cache type mask, see cache modes */ -#define OMAP_BO_TILED_MASK 0x00000f00 /* tiled mapping mask, see tiled modes */ +/* Scanout buffer, consumable by DSS */ +#define OMAP_BO_SCANOUT 0x00000001 -/* cache modes */ -#define OMAP_BO_CACHED 0x00000000 /* default */ -#define OMAP_BO_WC 0x00000002 /* write-combine */ -#define OMAP_BO_UNCACHED 0x00000004 /* strongly-ordered (uncached) */ +/* Buffer CPU caching mode: cached, write-combining or uncached. */ +#define OMAP_BO_CACHED 0x00000000 +#define OMAP_BO_WC 0x00000002 +#define OMAP_BO_UNCACHED 0x00000004 +#define OMAP_BO_CACHE_MASK 0x00000006 -/* tiled modes */ +/* Use TILER for the buffer. The TILER container unit can be 8, 16 or 32 bits. */ #define OMAP_BO_TILED_8 0x00000100 #define OMAP_BO_TILED_16 0x00000200 #define OMAP_BO_TILED_32 0x00000300 +#define OMAP_BO_TILED_MASK 0x00000f00 #define OMAP_BO_TILED (OMAP_BO_TILED_8 | OMAP_BO_TILED_16 | OMAP_BO_TILED_32) union omap_gem_size { -- cgit v1.2.3 From 48b34ac041756c2fd3a898d6e96be97416858b45 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 10 Oct 2019 13:59:57 +0200 Subject: drm/omap: remove OMAP_BO_TILED define OMAP_BO_TILED does not make sense, as OMAP_BO_TILED_* values are not bitmasks but normal values. As we already have OMAP_BO_TILED_MASK for the mask, we can remove OMAP_BO_TILED and use OMAP_BO_TILED_MASK instead. Signed-off-by: Tomi Valkeinen Reviewed-by: Jean-Jacques Hiblot Link: https://patchwork.freedesktop.org/patch/msgid/20191010120000.1421-6-jjhiblot@ti.com --- drivers/gpu/drm/omapdrm/omap_dmm_tiler.h | 2 +- drivers/gpu/drm/omapdrm/omap_fb.c | 6 +++--- drivers/gpu/drm/omapdrm/omap_gem.c | 18 +++++++++--------- drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c | 2 +- include/uapi/drm/omap_drm.h | 1 - 5 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.h b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.h index 835e6654fa82..43c1d096b021 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.h +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.h @@ -113,7 +113,7 @@ extern struct platform_driver omap_dmm_driver; /* GEM bo flags -> tiler fmt */ static inline enum tiler_fmt gem2fmt(u32 flags) { - switch (flags & OMAP_BO_TILED) { + switch (flags & OMAP_BO_TILED_MASK) { case OMAP_BO_TILED_8: return TILFMT_8BIT; case OMAP_BO_TILED_16: diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index 1b8b5108caf8..7403316088b8 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -95,7 +95,7 @@ static u32 get_linear_addr(struct drm_framebuffer *fb, bool omap_framebuffer_supports_rotation(struct drm_framebuffer *fb) { - return omap_gem_flags(fb->obj[0]) & OMAP_BO_TILED; + return omap_gem_flags(fb->obj[0]) & OMAP_BO_TILED_MASK; } /* Note: DRM rotates counter-clockwise, TILER & DSS rotates clockwise */ @@ -154,7 +154,7 @@ void omap_framebuffer_update_scanout(struct drm_framebuffer *fb, x = state->src_x >> 16; y = state->src_y >> 16; - if (omap_gem_flags(fb->obj[0]) & OMAP_BO_TILED) { + if (omap_gem_flags(fb->obj[0]) & OMAP_BO_TILED_MASK) { u32 w = state->src_w >> 16; u32 h = state->src_h >> 16; @@ -212,7 +212,7 @@ void omap_framebuffer_update_scanout(struct drm_framebuffer *fb, plane = &omap_fb->planes[1]; if (info->rotation_type == OMAP_DSS_ROT_TILER) { - WARN_ON(!(omap_gem_flags(fb->obj[1]) & OMAP_BO_TILED)); + WARN_ON(!(omap_gem_flags(fb->obj[1]) & OMAP_BO_TILED_MASK)); omap_gem_rotated_dma_addr(fb->obj[1], orient, x/2, y/2, &info->p_uv_addr); } else { diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index a6562d23d314..4e8fcfdff3a0 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -196,7 +196,7 @@ static void omap_gem_evict(struct drm_gem_object *obj) struct omap_gem_object *omap_obj = to_omap_bo(obj); struct omap_drm_private *priv = obj->dev->dev_private; - if (omap_obj->flags & OMAP_BO_TILED) { + if (omap_obj->flags & OMAP_BO_TILED_MASK) { enum tiler_fmt fmt = gem2fmt(omap_obj->flags); int i; @@ -324,7 +324,7 @@ size_t omap_gem_mmap_size(struct drm_gem_object *obj) struct omap_gem_object *omap_obj = to_omap_bo(obj); size_t size = obj->size; - if (omap_obj->flags & OMAP_BO_TILED) { + if (omap_obj->flags & OMAP_BO_TILED_MASK) { /* for tiled buffers, the virtual size has stride rounded up * to 4kb.. (to hide the fact that row n+1 might start 16kb or * 32kb later!). But we don't back the entire buffer with @@ -513,7 +513,7 @@ vm_fault_t omap_gem_fault(struct vm_fault *vmf) * probably trigger put_pages()? */ - if (omap_obj->flags & OMAP_BO_TILED) + if (omap_obj->flags & OMAP_BO_TILED_MASK) ret = omap_gem_fault_2d(obj, vma, vmf); else ret = omap_gem_fault_1d(obj, vma, vmf); @@ -786,7 +786,7 @@ int omap_gem_pin(struct drm_gem_object *obj, dma_addr_t *dma_addr) if (ret) goto fail; - if (omap_obj->flags & OMAP_BO_TILED) { + if (omap_obj->flags & OMAP_BO_TILED_MASK) { block = tiler_reserve_2d(fmt, omap_obj->width, omap_obj->height, 0); @@ -892,7 +892,7 @@ int omap_gem_rotated_dma_addr(struct drm_gem_object *obj, u32 orient, mutex_lock(&omap_obj->lock); if ((refcount_read(&omap_obj->dma_addr_cnt) > 0) && omap_obj->block && - (omap_obj->flags & OMAP_BO_TILED)) { + (omap_obj->flags & OMAP_BO_TILED_MASK)) { *dma_addr = tiler_tsptr(omap_obj->block, orient, x, y); ret = 0; } @@ -907,7 +907,7 @@ int omap_gem_tiled_stride(struct drm_gem_object *obj, u32 orient) { struct omap_gem_object *omap_obj = to_omap_bo(obj); int ret = -EINVAL; - if (omap_obj->flags & OMAP_BO_TILED) + if (omap_obj->flags & OMAP_BO_TILED_MASK) ret = tiler_stride(gem2fmt(omap_obj->flags), orient); return ret; } @@ -1046,7 +1046,7 @@ void omap_gem_describe(struct drm_gem_object *obj, struct seq_file *m) refcount_read(&omap_obj->dma_addr_cnt), omap_obj->vaddr, omap_obj->roll); - if (omap_obj->flags & OMAP_BO_TILED) { + if (omap_obj->flags & OMAP_BO_TILED_MASK) { seq_printf(m, " %dx%d", omap_obj->width, omap_obj->height); if (omap_obj->block) { struct tcm_area *area = &omap_obj->block->area; @@ -1145,7 +1145,7 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev, int ret; /* Validate the flags and compute the memory and cache flags. */ - if (flags & OMAP_BO_TILED) { + if (flags & OMAP_BO_TILED_MASK) { if (!priv->usergart) { dev_err(dev->dev, "Tiled buffers require DMM\n"); return NULL; @@ -1187,7 +1187,7 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev, omap_obj->flags = flags; mutex_init(&omap_obj->lock); - if (flags & OMAP_BO_TILED) { + if (flags & OMAP_BO_TILED_MASK) { /* * For tiled buffers align dimensions to slot boundaries and * calculate size based on aligned dimensions. diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c index e8c3ae7ac77e..7344bb61936c 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c +++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c @@ -67,7 +67,7 @@ static int omap_gem_dmabuf_begin_cpu_access(struct dma_buf *buffer, { struct drm_gem_object *obj = buffer->priv; struct page **pages; - if (omap_gem_flags(obj) & OMAP_BO_TILED) { + if (omap_gem_flags(obj) & OMAP_BO_TILED_MASK) { /* TODO we would need to pin at least part of the buffer to * get de-tiled view. For now just reject it. */ diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h index d8ee2f840697..5a142fad473c 100644 --- a/include/uapi/drm/omap_drm.h +++ b/include/uapi/drm/omap_drm.h @@ -52,7 +52,6 @@ struct drm_omap_param { #define OMAP_BO_TILED_16 0x00000200 #define OMAP_BO_TILED_32 0x00000300 #define OMAP_BO_TILED_MASK 0x00000f00 -#define OMAP_BO_TILED (OMAP_BO_TILED_8 | OMAP_BO_TILED_16 | OMAP_BO_TILED_32) union omap_gem_size { __u32 bytes; /* (for non-tiled formats) */ -- cgit v1.2.3 From 23b482252836ab3c5e6b3b20ed3038449cbc7679 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 10 Oct 2019 14:00:00 +0200 Subject: drm/omap: add OMAP_BO flags to affect buffer allocation On SoCs with DMM/TILER, we have two ways to allocate buffers: normal dma_alloc or via DMM (which basically functions as an IOMMU). DMM can map 128MB at a time, and we only map the DMM buffers when they are used (i.e. not at alloc time). If DMM is present, omapdrm always uses DMM. There are use cases that require lots of big buffers that are being used at the same time by different IPs. At the moment the userspace has a hard maximum of 128MB. This patch adds three new flags that can be used by the userspace to solve the situation: OMAP_BO_MEM_CONTIG: The driver will use dma_alloc to get the memory. This can be used to avoid DMM if the userspace knows it needs more than 128M of memory at the same time. OMAP_BO_MEM_DMM: The driver will use DMM to get the memory. There's not much use for this flag at the moment, as on platforms with DMM it is used by default, but it's here for completeness. OMAP_BO_MEM_PIN: The driver will pin the memory at alloc time, and keep it pinned. This can be used to 1) get an error at alloc time if DMM space is full, and 2) get rid of the constant pin/unpin operations which may have some effect on performance. If none of the flags are given, the behavior is the same as currently. Signed-off-by: Tomi Valkeinen Reviewed-by: Jean-Jacques Hiblot Link: https://patchwork.freedesktop.org/patch/msgid/20191010120000.1421-9-jjhiblot@ti.com --- drivers/gpu/drm/omapdrm/omap_gem.c | 54 ++++++++++++++++++++++++++++++++++++-- include/uapi/drm/omap_drm.h | 9 +++++++ 2 files changed, 61 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index e518d93ca6df..bf18dfe2b689 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -1097,6 +1097,9 @@ void omap_gem_free_object(struct drm_gem_object *obj) list_del(&omap_obj->mm_list); mutex_unlock(&priv->list_lock); + if (omap_obj->flags & OMAP_BO_MEM_PIN) + omap_gem_unpin_locked(obj); + /* * We own the sole reference to the object at this point, but to keep * lockdep happy, we must still take the omap_obj_lock to call @@ -1147,10 +1150,19 @@ static bool omap_gem_validate_flags(struct drm_device *dev, u32 flags) return false; } + if ((flags & OMAP_BO_MEM_CONTIG) && (flags & OMAP_BO_MEM_DMM)) + return false; + + if ((flags & OMAP_BO_MEM_DMM) && !priv->usergart) + return false; + if (flags & OMAP_BO_TILED_MASK) { if (!priv->usergart) return false; + if (flags & OMAP_BO_MEM_CONTIG) + return false; + switch (flags & OMAP_BO_TILED_MASK) { case OMAP_BO_TILED_8: case OMAP_BO_TILED_16: @@ -1165,7 +1177,34 @@ static bool omap_gem_validate_flags(struct drm_device *dev, u32 flags) return true; } -/* GEM buffer object constructor */ +/** + * omap_gem_new() - Create a new GEM buffer + * @dev: The DRM device + * @gsize: The requested size for the GEM buffer. If the buffer is tiled + * (2D buffer), the size is a pair of values: height and width + * expressed in pixels. If the buffers is not tiled, it is expressed + * in bytes. + * @flags: Flags give additionnal information about the allocation: + * OMAP_BO_TILED_x: use the TILER (2D buffers). The TILER container + * unit can be 8, 16 or 32 bits. Cache is always disabled for + * tiled buffers. + * OMAP_BO_SCANOUT: Scannout buffer, consummable by the DSS + * OMAP_BO_CACHED: Buffer CPU caching mode: cached + * OMAP_BO_WC: Buffer CPU caching mode: write-combined + * OMAP_BO_UNCACHED: Buffer CPU caching mode: uncached + * OMAP_BO_MEM_CONTIG: The driver will use dma_alloc to get the memory. + * This can be used to avoid DMM if the userspace knows it needs + * more than 128M of memory at the same time. + * OMAP_BO_MEM_DMM: The driver will use DMM to get the memory. There's + * not much use for this flag at the moment, as on platforms with + * DMM it is used by default, but it's here for completeness. + * OMAP_BO_MEM_PIN: The driver will pin the memory at alloc time, and + * keep it pinned. This can be used to 1) get an error at alloc + * time if DMM space is full, and 2) get rid of the constant + * pin/unpin operations which may have some effect on performance. + * + * Return: The GEM buffer or NULL if the allocation failed + */ struct drm_gem_object *omap_gem_new(struct drm_device *dev, union omap_gem_size gsize, u32 flags) { @@ -1193,7 +1232,8 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev, */ flags &= ~(OMAP_BO_CACHED|OMAP_BO_WC|OMAP_BO_UNCACHED); flags |= tiler_get_cpu_cache_flags(); - } else if ((flags & OMAP_BO_SCANOUT) && !priv->has_dmm) { + } else if ((flags & OMAP_BO_MEM_CONTIG) || + ((flags & OMAP_BO_SCANOUT) && !priv->has_dmm)) { /* * If we don't have DMM, we must allocate scanout buffers * from contiguous DMA memory. @@ -1253,12 +1293,22 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev, goto err_release; } + if (flags & OMAP_BO_MEM_PIN) { + ret = omap_gem_pin(obj, NULL); + if (ret) + goto err_free_dma; + } + mutex_lock(&priv->list_lock); list_add(&omap_obj->mm_list, &priv->obj_list); mutex_unlock(&priv->list_lock); return obj; +err_free_dma: + if (flags & OMAP_BO_MEM_DMA_API) + dma_free_wc(dev->dev, size, omap_obj->vaddr, + omap_obj->dma_addr); err_release: drm_gem_object_release(obj); err_free: diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h index 5a142fad473c..842d3180a442 100644 --- a/include/uapi/drm/omap_drm.h +++ b/include/uapi/drm/omap_drm.h @@ -47,6 +47,15 @@ struct drm_omap_param { #define OMAP_BO_UNCACHED 0x00000004 #define OMAP_BO_CACHE_MASK 0x00000006 +/* Force allocation from contiguous DMA memory */ +#define OMAP_BO_MEM_CONTIG 0x00000008 + +/* Force allocation via DMM */ +#define OMAP_BO_MEM_DMM 0x00000010 + +/* Pin the buffer when allocating and keep pinned */ +#define OMAP_BO_MEM_PIN 0x00000020 + /* Use TILER for the buffer. The TILER container unit can be 8, 16 or 32 bits. */ #define OMAP_BO_TILED_8 0x00000100 #define OMAP_BO_TILED_16 0x00000200 -- cgit v1.2.3 From b8d49f28aa03e4678e450e588b10c0faf96e4118 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 14 Oct 2019 21:14:01 +0100 Subject: drm/i915/perf: introduce a versioning of the i915-perf uapi Reporting this version will help application figure out what level of the support the running kernel provides. v2: Add i915_perf_ioctl_version() (Chris) Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191014201404.22468-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_getparam.c | 4 ++++ drivers/gpu/drm/i915/i915_perf.c | 10 ++++++++++ drivers/gpu/drm/i915/i915_perf.h | 1 + include/uapi/drm/i915_drm.h | 21 +++++++++++++++++++++ 4 files changed, 36 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index f4b3cbb1adce..ad33fbe90a28 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -5,6 +5,7 @@ #include "gt/intel_engine_user.h" #include "i915_drv.h" +#include "i915_perf.h" int i915_getparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -156,6 +157,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_MMAP_GTT_COHERENT: value = INTEL_INFO(i915)->has_coherent_ggtt; break; + case I915_PARAM_PERF_REVISION: + value = i915_perf_ioctl_version(); + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 366580701ba2..220e3384af30 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4017,6 +4017,16 @@ void i915_perf_fini(struct drm_i915_private *i915) perf->i915 = NULL; } +/** + * i915_perf_ioctl_version - Version of the i915-perf subsystem + * + * This version number is used by userspace to detect available features. + */ +int i915_perf_ioctl_version(void) +{ + return 1; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_perf.c" #endif diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index 98e16be48a73..4ceebce72060 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -22,6 +22,7 @@ void i915_perf_init(struct drm_i915_private *i915); void i915_perf_fini(struct drm_i915_private *i915); void i915_perf_register(struct drm_i915_private *i915); void i915_perf_unregister(struct drm_i915_private *i915); +int i915_perf_ioctl_version(void); int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 30c542144016..c50c712b3771 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -611,6 +611,13 @@ typedef struct drm_i915_irq_wait { * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT. */ #define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53 + +/* + * Revision of the i915-perf uAPI. The value returned helps determine what + * i915-perf features are available. See drm_i915_perf_property_id. + */ +#define I915_PARAM_PERF_REVISION 54 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1844,23 +1851,31 @@ enum drm_i915_perf_property_id { * Open the stream for a specific context handle (as used with * execbuffer2). A stream opened for a specific context this way * won't typically require root privileges. + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_CTX_HANDLE = 1, /** * A value of 1 requests the inclusion of raw OA unit reports as * part of stream samples. + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_SAMPLE_OA, /** * The value specifies which set of OA unit metrics should be * be configured, defining the contents of any OA unit reports. + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_OA_METRICS_SET, /** * The value specifies the size and layout of OA unit reports. + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_OA_FORMAT, @@ -1870,6 +1885,8 @@ enum drm_i915_perf_property_id { * from this exponent as follows: * * 80ns * 2^(period_exponent + 1) + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_OA_EXPONENT, @@ -1901,6 +1918,8 @@ struct drm_i915_perf_open_param { * to close and re-open a stream with the same configuration. * * It's undefined whether any pending data for the stream will be lost. + * + * This ioctl is available in perf revision 1. */ #define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) @@ -1908,6 +1927,8 @@ struct drm_i915_perf_open_param { * Disable data capture for a stream. * * It is an error to try and read a stream that is disabled. + * + * This ioctl is available in perf revision 1. */ #define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) -- cgit v1.2.3 From 4f6ccc74a85cbb4cdd373c374dc76398dc7603a1 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 14 Oct 2019 21:14:02 +0100 Subject: drm/i915: add support for perf configuration queries Listing configurations at the moment is supported only through sysfs. This might cause issues for applications wanting to list configurations from a container where sysfs isn't available. This change adds a way to query the number of configurations and their content through the i915 query uAPI. v2: Fix sparse warnings (Lionel) Add support to query configuration using uuid (Lionel) v3: Fix some inconsistency in uapi header (Lionel) Fix unlocking when not locked issue (Lionel) Add debug messages (Lionel) v4: Fix missing unlock (Dan) v5: Drop lock when copying config content to userspace (Chris) v6: Drop lock when copying config list to userspace (Chris) Fix deadlock when calling i915_perf_get_oa_config() under perf.metrics_lock (Lionel) Add i915_oa_config_get() (Chris) Link: https://gitlab.freedesktop.org/mesa/mesa/merge_requests/932 Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191014201404.22468-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_perf.c | 3 +- drivers/gpu/drm/i915/i915_query.c | 296 ++++++++++++++++++++++++++++++++++++++ include/uapi/drm/i915_drm.h | 62 +++++++- 3 files changed, 358 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 220e3384af30..109782ea30ad 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3806,8 +3806,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, GEM_BUG_ON(*arg != oa_config->id); - sysfs_remove_group(perf->metrics_kobj, - &oa_config->sysfs_metric); + sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric); idr_remove(&perf->metrics_idr, *arg); diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index abac5042da2b..c27cfef9281c 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -7,6 +7,7 @@ #include #include "i915_drv.h" +#include "i915_perf.h" #include "i915_query.h" #include @@ -140,10 +141,305 @@ query_engine_info(struct drm_i915_private *i915, return len; } +static int can_copy_perf_config_registers_or_number(u32 user_n_regs, + u64 user_regs_ptr, + u32 kernel_n_regs) +{ + /* + * We'll just put the number of registers, and won't copy the + * register. + */ + if (user_n_regs == 0) + return 0; + + if (user_n_regs < kernel_n_regs) + return -EINVAL; + + if (!access_ok(u64_to_user_ptr(user_regs_ptr), + 2 * sizeof(u32) * kernel_n_regs)) + return -EFAULT; + + return 0; +} + +static int copy_perf_config_registers_or_number(const struct i915_oa_reg *kernel_regs, + u32 kernel_n_regs, + u64 user_regs_ptr, + u32 *user_n_regs) +{ + u32 r; + + if (*user_n_regs == 0) { + *user_n_regs = kernel_n_regs; + return 0; + } + + *user_n_regs = kernel_n_regs; + + for (r = 0; r < kernel_n_regs; r++) { + u32 __user *user_reg_ptr = + u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2); + u32 __user *user_val_ptr = + u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2 + + sizeof(u32)); + int ret; + + ret = __put_user(i915_mmio_reg_offset(kernel_regs[r].addr), + user_reg_ptr); + if (ret) + return -EFAULT; + + ret = __put_user(kernel_regs[r].value, user_val_ptr); + if (ret) + return -EFAULT; + } + + return 0; +} + +static int query_perf_config_data(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item, + bool use_uuid) +{ + struct drm_i915_query_perf_config __user *user_query_config_ptr = + u64_to_user_ptr(query_item->data_ptr); + struct drm_i915_perf_oa_config __user *user_config_ptr = + u64_to_user_ptr(query_item->data_ptr + + sizeof(struct drm_i915_query_perf_config)); + struct drm_i915_perf_oa_config user_config; + struct i915_perf *perf = &i915->perf; + struct i915_oa_config *oa_config; + char uuid[UUID_STRING_LEN + 1]; + u64 config_id; + u32 flags, total_size; + int ret; + + if (!perf->i915) + return -ENODEV; + + total_size = + sizeof(struct drm_i915_query_perf_config) + + sizeof(struct drm_i915_perf_oa_config); + + if (query_item->length == 0) + return total_size; + + if (query_item->length < total_size) { + DRM_DEBUG("Invalid query config data item size=%u expected=%u\n", + query_item->length, total_size); + return -EINVAL; + } + + if (!access_ok(user_query_config_ptr, total_size)) + return -EFAULT; + + if (__get_user(flags, &user_query_config_ptr->flags)) + return -EFAULT; + + if (flags != 0) + return -EINVAL; + + if (use_uuid) { + struct i915_oa_config *tmp; + int id; + + BUILD_BUG_ON(sizeof(user_query_config_ptr->uuid) >= sizeof(uuid)); + + memset(&uuid, 0, sizeof(uuid)); + if (__copy_from_user(uuid, user_query_config_ptr->uuid, + sizeof(user_query_config_ptr->uuid))) + return -EFAULT; + + oa_config = NULL; + rcu_read_lock(); + idr_for_each_entry(&perf->metrics_idr, tmp, id) { + if (!strcmp(tmp->uuid, uuid)) { + oa_config = i915_oa_config_get(tmp); + break; + } + } + rcu_read_unlock(); + } else { + if (__get_user(config_id, &user_query_config_ptr->config)) + return -EFAULT; + + oa_config = i915_perf_get_oa_config(perf, config_id); + } + if (!oa_config) + return -ENOENT; + + if (__copy_from_user(&user_config, user_config_ptr, + sizeof(user_config))) { + ret = -EFAULT; + goto out; + } + + ret = can_copy_perf_config_registers_or_number(user_config.n_boolean_regs, + user_config.boolean_regs_ptr, + oa_config->b_counter_regs_len); + if (ret) + goto out; + + ret = can_copy_perf_config_registers_or_number(user_config.n_flex_regs, + user_config.flex_regs_ptr, + oa_config->flex_regs_len); + if (ret) + goto out; + + ret = can_copy_perf_config_registers_or_number(user_config.n_mux_regs, + user_config.mux_regs_ptr, + oa_config->mux_regs_len); + if (ret) + goto out; + + ret = copy_perf_config_registers_or_number(oa_config->b_counter_regs, + oa_config->b_counter_regs_len, + user_config.boolean_regs_ptr, + &user_config.n_boolean_regs); + if (ret) + goto out; + + ret = copy_perf_config_registers_or_number(oa_config->flex_regs, + oa_config->flex_regs_len, + user_config.flex_regs_ptr, + &user_config.n_flex_regs); + if (ret) + goto out; + + ret = copy_perf_config_registers_or_number(oa_config->mux_regs, + oa_config->mux_regs_len, + user_config.mux_regs_ptr, + &user_config.n_mux_regs); + if (ret) + goto out; + + memcpy(user_config.uuid, oa_config->uuid, sizeof(user_config.uuid)); + + if (__copy_to_user(user_config_ptr, &user_config, + sizeof(user_config))) { + ret = -EFAULT; + goto out; + } + + ret = total_size; + +out: + i915_oa_config_put(oa_config); + return ret; +} + +static size_t sizeof_perf_config_list(size_t count) +{ + return sizeof(struct drm_i915_query_perf_config) + sizeof(u64) * count; +} + +static size_t sizeof_perf_metrics(struct i915_perf *perf) +{ + struct i915_oa_config *tmp; + size_t i; + int id; + + i = 1; + rcu_read_lock(); + idr_for_each_entry(&perf->metrics_idr, tmp, id) + i++; + rcu_read_unlock(); + + return sizeof_perf_config_list(i); +} + +static int query_perf_config_list(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item) +{ + struct drm_i915_query_perf_config __user *user_query_config_ptr = + u64_to_user_ptr(query_item->data_ptr); + struct i915_perf *perf = &i915->perf; + u64 *oa_config_ids = NULL; + int alloc, n_configs; + u32 flags; + int ret; + + if (!perf->i915) + return -ENODEV; + + if (query_item->length == 0) + return sizeof_perf_metrics(perf); + + if (get_user(flags, &user_query_config_ptr->flags)) + return -EFAULT; + + if (flags != 0) + return -EINVAL; + + n_configs = 1; + do { + struct i915_oa_config *tmp; + u64 *ids; + int id; + + ids = krealloc(oa_config_ids, + n_configs * sizeof(*oa_config_ids), + GFP_KERNEL); + if (!ids) + return -ENOMEM; + + alloc = fetch_and_zero(&n_configs); + + ids[n_configs++] = 1ull; /* reserved for test_config */ + rcu_read_lock(); + idr_for_each_entry(&perf->metrics_idr, tmp, id) { + if (n_configs < alloc) + ids[n_configs] = id; + n_configs++; + } + rcu_read_unlock(); + + oa_config_ids = ids; + } while (n_configs > alloc); + + if (query_item->length < sizeof_perf_config_list(n_configs)) { + DRM_DEBUG("Invalid query config list item size=%u expected=%zu\n", + query_item->length, + sizeof_perf_config_list(n_configs)); + kfree(oa_config_ids); + return -EINVAL; + } + + if (put_user(n_configs, &user_query_config_ptr->config)) { + kfree(oa_config_ids); + return -EFAULT; + } + + ret = copy_to_user(user_query_config_ptr + 1, + oa_config_ids, + n_configs * sizeof(*oa_config_ids)); + kfree(oa_config_ids); + if (ret) + return -EFAULT; + + return sizeof_perf_config_list(n_configs); +} + +static int query_perf_config(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item) +{ + switch (query_item->flags) { + case DRM_I915_QUERY_PERF_CONFIG_LIST: + return query_perf_config_list(i915, query_item); + case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID: + return query_perf_config_data(i915, query_item, true); + case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID: + return query_perf_config_data(i915, query_item, false); + default: + return -EINVAL; + } +} + static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, struct drm_i915_query_item *query_item) = { query_topology_info, query_engine_info, + query_perf_config, }; int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index c50c712b3771..0c7b2815fbf1 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2005,6 +2005,7 @@ struct drm_i915_query_item { __u64 query_id; #define DRM_I915_QUERY_TOPOLOGY_INFO 1 #define DRM_I915_QUERY_ENGINE_INFO 2 +#define DRM_I915_QUERY_PERF_CONFIG 3 /* Must be kept compact -- no holes and well documented */ /* @@ -2016,9 +2017,18 @@ struct drm_i915_query_item { __s32 length; /* - * Unused for now. Must be cleared to zero. + * When query_id == DRM_I915_QUERY_TOPOLOGY_INFO, must be 0. + * + * When query_id == DRM_I915_QUERY_PERF_CONFIG, must be one of the + * following : + * - DRM_I915_QUERY_PERF_CONFIG_LIST + * - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID + * - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID */ __u32 flags; +#define DRM_I915_QUERY_PERF_CONFIG_LIST 1 +#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2 +#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID 3 /* * Data will be written at the location pointed by data_ptr when the @@ -2146,6 +2156,56 @@ struct drm_i915_query_engine_info { struct drm_i915_engine_info engines[]; }; +/* + * Data written by the kernel with query DRM_I915_QUERY_PERF_CONFIG. + */ +struct drm_i915_query_perf_config { + union { + /* + * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets + * this fields to the number of configurations available. + */ + __u64 n_configs; + + /* + * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID, + * i915 will use the value in this field as configuration + * identifier to decide what data to write into config_ptr. + */ + __u64 config; + + /* + * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, + * i915 will use the value in this field as configuration + * identifier to decide what data to write into config_ptr. + * + * String formatted like "%08x-%04x-%04x-%04x-%012x" + */ + char uuid[36]; + }; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 will + * write an array of __u64 of configuration identifiers. + * + * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_DATA, i915 will + * write a struct drm_i915_perf_oa_config. If the following fields of + * drm_i915_perf_oa_config are set not set to 0, i915 will write into + * the associated pointers the values of submitted when the + * configuration was created : + * + * - n_mux_regs + * - n_boolean_regs + * - n_flex_regs + */ + __u8 data[]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 7831e9a965ea2ca91855995d62197bc8078bb762 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Oct 2019 21:14:03 +0100 Subject: drm/i915/perf: Allow dynamic reconfiguration of the OA stream Introduce a new perf_ioctl command to change the OA configuration of the active stream. This allows the OA stream to be reconfigured between batch buffers, giving greater flexibility in sampling. We inject a request into the OA context to reconfigure the stream asynchronously on the GPU in between and ordered with execbuffer calls. Original patch for dynamic reconfiguration by Lionel Landwerlin. Link: https://gitlab.freedesktop.org/mesa/mesa/merge_requests/932 Signed-off-by: Chris Wilson Reviewed-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20191014201404.22468-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_perf.c | 46 +++++++++++++++++++++++++++++++++++++++- include/uapi/drm/i915_drm.h | 13 ++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 109782ea30ad..372c91f6a28e 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2862,6 +2862,40 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream) stream->ops->disable(stream); } +static long i915_perf_config_locked(struct i915_perf_stream *stream, + unsigned long metrics_set) +{ + struct i915_oa_config *config; + long ret = stream->oa_config->id; + + config = i915_perf_get_oa_config(stream->perf, metrics_set); + if (!config) + return -EINVAL; + + if (config != stream->oa_config) { + int err; + + /* + * If OA is bound to a specific context, emit the + * reconfiguration inline from that context. The update + * will then be ordered with respect to submission on that + * context. + * + * When set globally, we use a low priority kernel context, + * so it will effectively take effect when idle. + */ + err = emit_oa_config(stream, oa_context(stream)); + if (err == 0) + config = xchg(&stream->oa_config, config); + else + ret = err; + } + + i915_oa_config_put(config); + + return ret; +} + /** * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs * @stream: An i915 perf stream @@ -2885,6 +2919,8 @@ static long i915_perf_ioctl_locked(struct i915_perf_stream *stream, case I915_PERF_IOCTL_DISABLE: i915_perf_disable_locked(stream); return 0; + case I915_PERF_IOCTL_CONFIG: + return i915_perf_config_locked(stream, arg); } return -EINVAL; @@ -4023,7 +4059,15 @@ void i915_perf_fini(struct drm_i915_private *i915) */ int i915_perf_ioctl_version(void) { - return 1; + /* + * 1: Initial version + * I915_PERF_IOCTL_ENABLE + * I915_PERF_IOCTL_DISABLE + * + * 2: Added runtime modification of OA config. + * I915_PERF_IOCTL_CONFIG + */ + return 2; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 0c7b2815fbf1..b008ce8b4e6f 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1932,6 +1932,19 @@ struct drm_i915_perf_open_param { */ #define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) +/** + * Change metrics_set captured by a stream. + * + * If the stream is bound to a specific context, the configuration change + * will performed inline with that context such that it takes effect before + * the next execbuf submission. + * + * Returns the previously bound metrics set id, or a negative error code. + * + * This ioctl is available in perf revision 2. + */ +#define I915_PERF_IOCTL_CONFIG _IO('i', 0x2) + /** * Common to all i915 perf records */ -- cgit v1.2.3 From 9cd20ef7803cc53a00c6eb7198b3d870ac7b3766 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 14 Oct 2019 21:14:04 +0100 Subject: drm/i915/perf: allow holding preemption on filtered ctx We would like to make use of perf in Vulkan. The Vulkan API is much lower level than OpenGL, with applications directly exposed to the concept of command buffers (pretty much equivalent to our batch buffers). In Vulkan, queries are always limited in scope to a command buffer. In OpenGL, the lack of command buffer concept meant that queries' duration could span multiple command buffers. With that restriction gone in Vulkan, we would like to simplify measuring performance just by measuring the deltas between the counter snapshots written by 2 MI_RECORD_PERF_COUNT commands, rather than the more complex scheme we currently have in the GL driver, using 2 MI_RECORD_PERF_COUNT commands and doing some post processing on the stream of OA reports, coming from the global OA buffer, to remove any unrelated deltas in between the 2 MI_RECORD_PERF_COUNT. Disabling preemption only apply to a single context with which want to query performance counters for and is considered a privileged operation, by default protected by CAP_SYS_ADMIN. It is possible to enable it for a normal user by disabling the paranoid stream setting. v2: Store preemption setting in intel_context (Chris) v3: Use priorities to avoid preemption rather than the HW mechanism v4: Just modify the port priority reporting function v5: Add nopreempt flag on gem context and always flag requests appropriately, regarless of OA reconfiguration. Link: https://gitlab.freedesktop.org/mesa/mesa/merge_requests/932 Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191014201404.22468-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.h | 18 ++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 3 ++ drivers/gpu/drm/i915/i915_perf.c | 34 +++++++++++++++++++++-- drivers/gpu/drm/i915/i915_perf_types.h | 8 ++++++ include/uapi/drm/i915_drm.h | 11 ++++++++ 6 files changed, 72 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 9234586830d1..cfe80590f0ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -114,6 +114,24 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); } +static inline bool +i915_gem_context_nopreempt(const struct i915_gem_context *ctx) +{ + return test_bit(CONTEXT_NOPREEMPT, &ctx->flags); +} + +static inline void +i915_gem_context_set_nopreempt(struct i915_gem_context *ctx) +{ + set_bit(CONTEXT_NOPREEMPT, &ctx->flags); +} + +static inline void +i915_gem_context_clear_nopreempt(struct i915_gem_context *ctx) +{ + clear_bit(CONTEXT_NOPREEMPT, &ctx->flags); +} + static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) { return !ctx->file_priv; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index ab8e1367dfc8..fe97b8ba4fda 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -146,6 +146,7 @@ struct i915_gem_context { #define CONTEXT_CLOSED 1 #define CONTEXT_FORCE_SINGLE_SUBMISSION 2 #define CONTEXT_USER_ENGINES 3 +#define CONTEXT_NOPREEMPT 4 struct mutex mutex; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 580c15e74a0a..98cd92320afb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2079,6 +2079,9 @@ static int eb_submit(struct i915_execbuffer *eb) if (err) return err; + if (i915_gem_context_nopreempt(eb->gem_context)) + eb->request->flags |= I915_REQUEST_NOPREEMPT; + return 0; } diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 372c91f6a28e..54ec1c4190ac 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -344,6 +344,8 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { * struct perf_open_properties - for validated properties given to open a stream * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags * @single_context: Whether a single or all gpu contexts should be monitored + * @hold_preemption: Whether the preemption is disabled for the filtered + * context * @ctx_handle: A gem ctx handle for use with @single_context * @metrics_set: An ID for an OA unit metric set advertised via sysfs * @oa_format: An OA unit HW report format @@ -359,6 +361,7 @@ struct perf_open_properties { u32 sample_flags; u64 single_context:1; + u64 hold_preemption:1; u64 ctx_handle; /* OA sampling state */ @@ -2514,6 +2517,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (WARN_ON(stream->oa_buffer.format_size == 0)) return -EINVAL; + stream->hold_preemption = props->hold_preemption; + stream->oa_buffer.format = perf->oa_formats[props->oa_format].format; @@ -2834,6 +2839,9 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream) if (stream->ops->enable) stream->ops->enable(stream); + + if (stream->hold_preemption) + i915_gem_context_set_nopreempt(stream->ctx); } /** @@ -2858,6 +2866,9 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream) /* Allow stream->ops->disable() to refer to this */ stream->enabled = false; + if (stream->hold_preemption) + i915_gem_context_clear_nopreempt(stream->ctx); + if (stream->ops->disable) stream->ops->disable(stream); } @@ -3067,6 +3078,15 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, } } + if (props->hold_preemption) { + if (!props->single_context) { + DRM_DEBUG("preemption disable with no context\n"); + ret = -EINVAL; + goto err; + } + privileged_op = true; + } + /* * On Haswell the OA unit supports clock gating off for a specific * context and in this mode there's no visibility of metrics for the @@ -3081,7 +3101,7 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to * enable the OA unit by default. */ - if (IS_HASWELL(perf->i915) && specific_ctx) + if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption) privileged_op = false; /* Similar to perf's kernel.perf_paranoid_cpu sysctl option @@ -3091,7 +3111,7 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, */ if (privileged_op && i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { - DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n"); + DRM_DEBUG("Insufficient privileges to open i915 perf stream\n"); ret = -EACCES; goto err_ctx; } @@ -3293,6 +3313,9 @@ static int read_properties_unlocked(struct i915_perf *perf, props->oa_periodic = true; props->oa_period_exponent = value; break; + case DRM_I915_PERF_PROP_HOLD_PREEMPTION: + props->hold_preemption = !!value; + break; case DRM_I915_PERF_PROP_MAX: MISSING_CASE(id); return -EINVAL; @@ -4066,8 +4089,13 @@ int i915_perf_ioctl_version(void) * * 2: Added runtime modification of OA config. * I915_PERF_IOCTL_CONFIG + * + * 3: Add DRM_I915_PERF_PROP_HOLD_PREEMPTION parameter to hold + * preemption on a particular context so that performance data is + * accessible from a delta of MI_RPC reports without looking at the + * OA buffer. */ - return 2; + return 3; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index d35a3c1946c3..a1f733fc905a 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -170,6 +170,14 @@ struct i915_perf_stream { */ bool enabled; + /** + * @hold_preemption: Whether preemption is put on hold for command + * submissions done on the @ctx. This is useful for some drivers that + * cannot easily post process the OA buffer context to subtract delta + * of performance counters not associated with @ctx. + */ + bool hold_preemption; + /** * @ops: The callbacks providing the implementation of this specific * type of configured stream. diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index b008ce8b4e6f..63d40cba97e0 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1890,6 +1890,17 @@ enum drm_i915_perf_property_id { */ DRM_I915_PERF_PROP_OA_EXPONENT, + /** + * Specifying this property is only valid when specify a context to + * filter with DRM_I915_PERF_PROP_CTX_HANDLE. Specifying this property + * will hold preemption of the particular context we want to gather + * performance data about. The execbuf2 submissions must include a + * drm_i915_gem_execbuffer_ext_perf parameter for this to apply. + * + * This property is available in perf revision 3. + */ + DRM_I915_PERF_PROP_HOLD_PREEMPTION, + DRM_I915_PERF_PROP_MAX /* non-ABI */ }; -- cgit v1.2.3 From 2093dea3def9d5bf3000697ae3b0ec36c43354e0 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 30 Jul 2019 21:02:08 +0800 Subject: drm/syncobj: extend syncobj query ability v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit user space needs a flexiable query ability. So that umd can get last signaled or submitted point. v2: add sanitizer checking. v3: rebase Change-Id: I6512b430524ebabe715e602a2bf5abb0a7e780ea Signed-off-by: Chunming Zhou Cc: Lionel Landwerlin Cc: Christian König Reviewed-by: Lionel Landwerlin Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/series/64044/ --- drivers/gpu/drm/drm_syncobj.c | 37 ++++++++++++++++++++++--------------- include/uapi/drm/drm.h | 3 ++- 2 files changed, 24 insertions(+), 16 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 9ec334663c2d..669c93fe2500 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1280,7 +1280,7 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE)) return -EOPNOTSUPP; - if (args->pad != 0) + if (args->flags != 0) return -EINVAL; if (args->count_handles == 0) @@ -1351,7 +1351,7 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE)) return -EOPNOTSUPP; - if (args->pad != 0) + if (args->flags & ~DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED) return -EINVAL; if (args->count_handles == 0) @@ -1372,25 +1372,32 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, fence = drm_syncobj_fence_get(syncobjs[i]); chain = to_dma_fence_chain(fence); if (chain) { - struct dma_fence *iter, *last_signaled = NULL; - - dma_fence_chain_for_each(iter, fence) { - if (iter->context != fence->context) { - dma_fence_put(iter); - /* It is most likely that timeline has - * unorder points. */ - break; + struct dma_fence *iter, *last_signaled = + dma_fence_get(fence); + + if (args->flags & + DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED) { + point = fence->seqno; + } else { + dma_fence_chain_for_each(iter, fence) { + if (iter->context != fence->context) { + dma_fence_put(iter); + /* It is most likely that timeline has + * unorder points. */ + break; + } + dma_fence_put(last_signaled); + last_signaled = dma_fence_get(iter); } - dma_fence_put(last_signaled); - last_signaled = dma_fence_get(iter); + point = dma_fence_is_signaled(last_signaled) ? + last_signaled->seqno : + to_dma_fence_chain(last_signaled)->prev_seqno; } - point = dma_fence_is_signaled(last_signaled) ? - last_signaled->seqno : - to_dma_fence_chain(last_signaled)->prev_seqno; dma_fence_put(last_signaled); } else { point = 0; } + dma_fence_put(fence); ret = copy_to_user(&points[i], &point, sizeof(uint64_t)); ret = ret ? -EFAULT : 0; if (ret) diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 8a5b2f8f8eb9..868bf7996c0f 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -778,11 +778,12 @@ struct drm_syncobj_array { __u32 pad; }; +#define DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED (1 << 0) /* last available point on timeline syncobj */ struct drm_syncobj_timeline_array { __u64 handles; __u64 points; __u32 count_handles; - __u32 pad; + __u32 flags; }; -- cgit v1.2.3 From 2f77d82e7ee416b51771cf022f23921b44aaaec3 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Fri, 18 Oct 2019 13:50:41 -0400 Subject: drm/fourcc: Fix undefined left shift in DRM_FORMAT_BIG_ENDIAN macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1<<31 is undefined because it's a signed int and C is terrible. Reviewed-by: Eric Engestrom Signed-off-by: Adam Jackson Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191018175041.613780-1-ajax@redhat.com --- include/uapi/drm/drm_fourcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 2376d36ea573..8caaaf7ff91b 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -69,7 +69,7 @@ extern "C" { #define fourcc_code(a, b, c, d) ((__u32)(a) | ((__u32)(b) << 8) | \ ((__u32)(c) << 16) | ((__u32)(d) << 24)) -#define DRM_FORMAT_BIG_ENDIAN (1<<31) /* format is big endian instead of little endian */ +#define DRM_FORMAT_BIG_ENDIAN (1U<<31) /* format is big endian instead of little endian */ /* Reserve 0 for the invalid format specifier */ #define DRM_FORMAT_INVALID 0 -- cgit v1.2.3 From a96bf3cbd7b8557f5c5c7938e5f8926ea39d55e9 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Tue, 22 Oct 2019 16:47:29 -0400 Subject: Revert "drm/omap: add OMAP_BO flags to affect buffer allocation" This reverts commit 23b482252836ab3c5e6b3b20ed3038449cbc7679. This patch does not have an acceptable open source userspace implementation, and as such it does not meet the requirements for adding new UAPI. Discussion is in the Link. Link: https://lists.freedesktop.org/archives/dri-devel/2019-October/240586.html Fixes: 23b482252836 ("drm/omap: add OMAP_BO flags to affect buffer allocation") Cc: Tomi Valkeinen Cc: Jean-Jacques Hiblot Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Acked-by: Tomi Valkeinen Acked-by: Daniel Vetter Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20191022204733.235801-1-sean@poorly.run --- drivers/gpu/drm/omapdrm/omap_gem.c | 54 ++------------------------------------ include/uapi/drm/omap_drm.h | 9 ------- 2 files changed, 2 insertions(+), 61 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index bf18dfe2b689..e518d93ca6df 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -1097,9 +1097,6 @@ void omap_gem_free_object(struct drm_gem_object *obj) list_del(&omap_obj->mm_list); mutex_unlock(&priv->list_lock); - if (omap_obj->flags & OMAP_BO_MEM_PIN) - omap_gem_unpin_locked(obj); - /* * We own the sole reference to the object at this point, but to keep * lockdep happy, we must still take the omap_obj_lock to call @@ -1150,19 +1147,10 @@ static bool omap_gem_validate_flags(struct drm_device *dev, u32 flags) return false; } - if ((flags & OMAP_BO_MEM_CONTIG) && (flags & OMAP_BO_MEM_DMM)) - return false; - - if ((flags & OMAP_BO_MEM_DMM) && !priv->usergart) - return false; - if (flags & OMAP_BO_TILED_MASK) { if (!priv->usergart) return false; - if (flags & OMAP_BO_MEM_CONTIG) - return false; - switch (flags & OMAP_BO_TILED_MASK) { case OMAP_BO_TILED_8: case OMAP_BO_TILED_16: @@ -1177,34 +1165,7 @@ static bool omap_gem_validate_flags(struct drm_device *dev, u32 flags) return true; } -/** - * omap_gem_new() - Create a new GEM buffer - * @dev: The DRM device - * @gsize: The requested size for the GEM buffer. If the buffer is tiled - * (2D buffer), the size is a pair of values: height and width - * expressed in pixels. If the buffers is not tiled, it is expressed - * in bytes. - * @flags: Flags give additionnal information about the allocation: - * OMAP_BO_TILED_x: use the TILER (2D buffers). The TILER container - * unit can be 8, 16 or 32 bits. Cache is always disabled for - * tiled buffers. - * OMAP_BO_SCANOUT: Scannout buffer, consummable by the DSS - * OMAP_BO_CACHED: Buffer CPU caching mode: cached - * OMAP_BO_WC: Buffer CPU caching mode: write-combined - * OMAP_BO_UNCACHED: Buffer CPU caching mode: uncached - * OMAP_BO_MEM_CONTIG: The driver will use dma_alloc to get the memory. - * This can be used to avoid DMM if the userspace knows it needs - * more than 128M of memory at the same time. - * OMAP_BO_MEM_DMM: The driver will use DMM to get the memory. There's - * not much use for this flag at the moment, as on platforms with - * DMM it is used by default, but it's here for completeness. - * OMAP_BO_MEM_PIN: The driver will pin the memory at alloc time, and - * keep it pinned. This can be used to 1) get an error at alloc - * time if DMM space is full, and 2) get rid of the constant - * pin/unpin operations which may have some effect on performance. - * - * Return: The GEM buffer or NULL if the allocation failed - */ +/* GEM buffer object constructor */ struct drm_gem_object *omap_gem_new(struct drm_device *dev, union omap_gem_size gsize, u32 flags) { @@ -1232,8 +1193,7 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev, */ flags &= ~(OMAP_BO_CACHED|OMAP_BO_WC|OMAP_BO_UNCACHED); flags |= tiler_get_cpu_cache_flags(); - } else if ((flags & OMAP_BO_MEM_CONTIG) || - ((flags & OMAP_BO_SCANOUT) && !priv->has_dmm)) { + } else if ((flags & OMAP_BO_SCANOUT) && !priv->has_dmm) { /* * If we don't have DMM, we must allocate scanout buffers * from contiguous DMA memory. @@ -1293,22 +1253,12 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev, goto err_release; } - if (flags & OMAP_BO_MEM_PIN) { - ret = omap_gem_pin(obj, NULL); - if (ret) - goto err_free_dma; - } - mutex_lock(&priv->list_lock); list_add(&omap_obj->mm_list, &priv->obj_list); mutex_unlock(&priv->list_lock); return obj; -err_free_dma: - if (flags & OMAP_BO_MEM_DMA_API) - dma_free_wc(dev->dev, size, omap_obj->vaddr, - omap_obj->dma_addr); err_release: drm_gem_object_release(obj); err_free: diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h index 842d3180a442..5a142fad473c 100644 --- a/include/uapi/drm/omap_drm.h +++ b/include/uapi/drm/omap_drm.h @@ -47,15 +47,6 @@ struct drm_omap_param { #define OMAP_BO_UNCACHED 0x00000004 #define OMAP_BO_CACHE_MASK 0x00000006 -/* Force allocation from contiguous DMA memory */ -#define OMAP_BO_MEM_CONTIG 0x00000008 - -/* Force allocation via DMM */ -#define OMAP_BO_MEM_DMM 0x00000010 - -/* Pin the buffer when allocating and keep pinned */ -#define OMAP_BO_MEM_PIN 0x00000020 - /* Use TILER for the buffer. The TILER container unit can be 8, 16 or 32 bits. */ #define OMAP_BO_TILED_8 0x00000100 #define OMAP_BO_TILED_16 0x00000200 -- cgit v1.2.3 From 3f4bb9f75090fc82229431a8e4a3c293b6094f2e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 24 Oct 2019 17:17:37 +0200 Subject: drm: Spelling s/connet/connect/ Fix misspellings of "connector" and "connection" Signed-off-by: Geert Uytterhoeven Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20191024151737.29287-1-geert+renesas@glider.be --- drivers/gpu/drm/gma500/mdfld_dsi_output.c | 2 +- include/uapi/drm/exynos_drm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_output.c b/drivers/gpu/drm/gma500/mdfld_dsi_output.c index 03023fa0fb6f..f350ac1ead18 100644 --- a/drivers/gpu/drm/gma500/mdfld_dsi_output.c +++ b/drivers/gpu/drm/gma500/mdfld_dsi_output.c @@ -498,7 +498,7 @@ void mdfld_dsi_output_init(struct drm_device *dev, return; } - /*create a new connetor*/ + /*create a new connector*/ dsi_connector = kzalloc(sizeof(struct mdfld_dsi_connector), GFP_KERNEL); if (!dsi_connector) { DRM_ERROR("No memory"); diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h index 3e59b8382dd8..45c6582b3df3 100644 --- a/include/uapi/drm/exynos_drm.h +++ b/include/uapi/drm/exynos_drm.h @@ -68,7 +68,7 @@ struct drm_exynos_gem_info { /** * A structure for user connection request of virtual display. * - * @connection: indicate whether doing connetion or not by user. + * @connection: indicate whether doing connection or not by user. * @extensions: if this value is 1 then the vidi driver would need additional * 128bytes edid data. * @edid: the edid data pointer from user side. -- cgit v1.2.3 From a69b0e855d3fd278ff6f09a23e1edf929538e304 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Mon, 21 Oct 2019 19:03:06 +0000 Subject: dma-buf: Add dma-buf heaps framework This framework allows a unified userspace interface for dma-buf exporters, allowing userland to allocate specific types of memory for use in dma-buf sharing. Each heap is given its own device node, which a user can allocate a dma-buf fd from using the DMA_HEAP_IOC_ALLOC. This code is an evoluiton of the Android ION implementation, and a big thanks is due to its authors/maintainers over time for their effort: Rebecca Schultz Zavin, Colin Cross, Benjamin Gaignard, Laura Abbott, and many other contributors! Cc: Laura Abbott Cc: Benjamin Gaignard Cc: Sumit Semwal Cc: Liam Mark Cc: Pratik Patel Cc: Brian Starkey Cc: Vincent Donnefort Cc: Sudipto Paul Cc: Andrew F. Davis Cc: Christoph Hellwig Cc: Chenbo Feng Cc: Alistair Strachan Cc: Hridya Valsaraju Cc: Hillf Danton Cc: dri-devel@lists.freedesktop.org Reviewed-by: Benjamin Gaignard Reviewed-by: Brian Starkey Acked-by: Laura Abbott Tested-by: Ayan Kumar Halder Signed-off-by: Andrew F. Davis Signed-off-by: John Stultz Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20191021190310.85221-2-john.stultz@linaro.org --- MAINTAINERS | 18 +++ drivers/dma-buf/Kconfig | 9 ++ drivers/dma-buf/Makefile | 1 + drivers/dma-buf/dma-heap.c | 269 ++++++++++++++++++++++++++++++++++++++++++ include/linux/dma-heap.h | 59 +++++++++ include/uapi/linux/dma-heap.h | 55 +++++++++ 6 files changed, 411 insertions(+) create mode 100644 drivers/dma-buf/dma-heap.c create mode 100644 include/linux/dma-heap.h create mode 100644 include/uapi/linux/dma-heap.h (limited to 'include/uapi') diff --git a/MAINTAINERS b/MAINTAINERS index b63c291ad029..5f39067caa7c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4942,6 +4942,24 @@ F: include/linux/*fence.h F: Documentation/driver-api/dma-buf.rst T: git git://anongit.freedesktop.org/drm/drm-misc +DMA-BUF HEAPS FRAMEWORK +M: Sumit Semwal +R: Andrew F. Davis +R: Benjamin Gaignard +R: Liam Mark +R: Laura Abbott +R: Brian Starkey +R: John Stultz +S: Maintained +L: linux-media@vger.kernel.org +L: dri-devel@lists.freedesktop.org +L: linaro-mm-sig@lists.linaro.org (moderated for non-subscribers) +F: include/uapi/linux/dma-heap.h +F: include/linux/dma-heap.h +F: drivers/dma-buf/dma-heap.c +F: drivers/dma-buf/heaps/* +T: git git://anongit.freedesktop.org/drm/drm-misc + DMA GENERIC OFFLOAD ENGINE SUBSYSTEM M: Vinod Koul L: dmaengine@vger.kernel.org diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig index a23b6752d11a..bffa58fc3e6e 100644 --- a/drivers/dma-buf/Kconfig +++ b/drivers/dma-buf/Kconfig @@ -44,4 +44,13 @@ config DMABUF_SELFTESTS default n depends on DMA_SHARED_BUFFER +menuconfig DMABUF_HEAPS + bool "DMA-BUF Userland Memory Heaps" + select DMA_SHARED_BUFFER + help + Choose this option to enable the DMA-BUF userland memory heaps. + This options creates per heap chardevs in /dev/dma_heap/ which + allows userspace to allocate dma-bufs that can be shared + between drivers. + endmenu diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile index 03479da06422..caee5eb3d351 100644 --- a/drivers/dma-buf/Makefile +++ b/drivers/dma-buf/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \ dma-resv.o seqno-fence.o +obj-$(CONFIG_DMABUF_HEAPS) += dma-heap.o obj-$(CONFIG_SYNC_FILE) += sync_file.o obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o obj-$(CONFIG_UDMABUF) += udmabuf.o diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c new file mode 100644 index 000000000000..9a41b73e54b4 --- /dev/null +++ b/drivers/dma-buf/dma-heap.c @@ -0,0 +1,269 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Framework for userspace DMA-BUF allocations + * + * Copyright (C) 2011 Google, Inc. + * Copyright (C) 2019 Linaro Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEVNAME "dma_heap" + +#define NUM_HEAP_MINORS 128 + +/** + * struct dma_heap - represents a dmabuf heap in the system + * @name: used for debugging/device-node name + * @ops: ops struct for this heap + * @minor minor number of this heap device + * @heap_devt heap device node + * @heap_cdev heap char device + * + * Represents a heap of memory from which buffers can be made. + */ +struct dma_heap { + const char *name; + const struct dma_heap_ops *ops; + void *priv; + unsigned int minor; + dev_t heap_devt; + struct list_head list; + struct cdev heap_cdev; +}; + +static LIST_HEAD(heap_list); +static DEFINE_MUTEX(heap_list_lock); +static dev_t dma_heap_devt; +static struct class *dma_heap_class; +static DEFINE_XARRAY_ALLOC(dma_heap_minors); + +static int dma_heap_buffer_alloc(struct dma_heap *heap, size_t len, + unsigned int fd_flags, + unsigned int heap_flags) +{ + /* + * Allocations from all heaps have to begin + * and end on page boundaries. + */ + len = PAGE_ALIGN(len); + if (!len) + return -EINVAL; + + return heap->ops->allocate(heap, len, fd_flags, heap_flags); +} + +static int dma_heap_open(struct inode *inode, struct file *file) +{ + struct dma_heap *heap; + + heap = xa_load(&dma_heap_minors, iminor(inode)); + if (!heap) { + pr_err("dma_heap: minor %d unknown.\n", iminor(inode)); + return -ENODEV; + } + + /* instance data as context */ + file->private_data = heap; + nonseekable_open(inode, file); + + return 0; +} + +static long dma_heap_ioctl_allocate(struct file *file, unsigned long arg) +{ + struct dma_heap_allocation_data heap_allocation; + struct dma_heap *heap = file->private_data; + int fd; + + if (copy_from_user(&heap_allocation, (void __user *)arg, + sizeof(heap_allocation))) + return -EFAULT; + + if (heap_allocation.fd || + heap_allocation.reserved0 || + heap_allocation.reserved1) { + pr_warn_once("dma_heap: ioctl data not valid\n"); + return -EINVAL; + } + + if (heap_allocation.fd_flags & ~DMA_HEAP_VALID_FD_FLAGS) { + pr_warn_once("dma_heap: fd_flags has invalid or unsupported flags set\n"); + return -EINVAL; + } + + if (heap_allocation.heap_flags & ~DMA_HEAP_VALID_HEAP_FLAGS) { + pr_warn_once("dma_heap: heap flags has invalid or unsupported flags set\n"); + return -EINVAL; + } + + fd = dma_heap_buffer_alloc(heap, heap_allocation.len, + heap_allocation.fd_flags, + heap_allocation.heap_flags); + if (fd < 0) + return fd; + + heap_allocation.fd = fd; + + if (copy_to_user((void __user *)arg, &heap_allocation, + sizeof(heap_allocation))) { + ksys_close(fd); + return -EFAULT; + } + + return 0; +} + +static long dma_heap_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int ret = 0; + + switch (cmd) { + case DMA_HEAP_IOC_ALLOC: + ret = dma_heap_ioctl_allocate(file, arg); + break; + default: + return -ENOTTY; + } + + return ret; +} + +static const struct file_operations dma_heap_fops = { + .owner = THIS_MODULE, + .open = dma_heap_open, + .unlocked_ioctl = dma_heap_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = dma_heap_ioctl, +#endif +}; + +/** + * dma_heap_get_drvdata() - get per-subdriver data for the heap + * @heap: DMA-Heap to retrieve private data for + * + * Returns: + * The per-subdriver data for the heap. + */ +void *dma_heap_get_drvdata(struct dma_heap *heap) +{ + return heap->priv; +} + +struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) +{ + struct dma_heap *heap, *h, *err_ret; + struct device *dev_ret; + int ret; + + if (!exp_info->name || !strcmp(exp_info->name, "")) { + pr_err("dma_heap: Cannot add heap without a name\n"); + return ERR_PTR(-EINVAL); + } + + if (!exp_info->ops || !exp_info->ops->allocate) { + pr_err("dma_heap: Cannot add heap with invalid ops struct\n"); + return ERR_PTR(-EINVAL); + } + + /* check the name is unique */ + mutex_lock(&heap_list_lock); + list_for_each_entry(h, &heap_list, list) { + if (!strcmp(h->name, exp_info->name)) { + mutex_unlock(&heap_list_lock); + pr_err("dma_heap: Already registered heap named %s\n", + exp_info->name); + return ERR_PTR(-EINVAL); + } + } + mutex_unlock(&heap_list_lock); + + heap = kzalloc(sizeof(*heap), GFP_KERNEL); + if (!heap) + return ERR_PTR(-ENOMEM); + + heap->name = exp_info->name; + heap->ops = exp_info->ops; + heap->priv = exp_info->priv; + + /* Find unused minor number */ + ret = xa_alloc(&dma_heap_minors, &heap->minor, heap, + XA_LIMIT(0, NUM_HEAP_MINORS - 1), GFP_KERNEL); + if (ret < 0) { + pr_err("dma_heap: Unable to get minor number for heap\n"); + err_ret = ERR_PTR(ret); + goto err0; + } + + /* Create device */ + heap->heap_devt = MKDEV(MAJOR(dma_heap_devt), heap->minor); + + cdev_init(&heap->heap_cdev, &dma_heap_fops); + ret = cdev_add(&heap->heap_cdev, heap->heap_devt, 1); + if (ret < 0) { + pr_err("dma_heap: Unable to add char device\n"); + err_ret = ERR_PTR(ret); + goto err1; + } + + dev_ret = device_create(dma_heap_class, + NULL, + heap->heap_devt, + NULL, + heap->name); + if (IS_ERR(dev_ret)) { + pr_err("dma_heap: Unable to create device\n"); + err_ret = ERR_CAST(dev_ret); + goto err2; + } + /* Add heap to the list */ + mutex_lock(&heap_list_lock); + list_add(&heap->list, &heap_list); + mutex_unlock(&heap_list_lock); + + return heap; + +err2: + cdev_del(&heap->heap_cdev); +err1: + xa_erase(&dma_heap_minors, heap->minor); +err0: + kfree(heap); + return err_ret; +} + +static char *dma_heap_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "dma_heap/%s", dev_name(dev)); +} + +static int dma_heap_init(void) +{ + int ret; + + ret = alloc_chrdev_region(&dma_heap_devt, 0, NUM_HEAP_MINORS, DEVNAME); + if (ret) + return ret; + + dma_heap_class = class_create(THIS_MODULE, DEVNAME); + if (IS_ERR(dma_heap_class)) { + unregister_chrdev_region(dma_heap_devt, NUM_HEAP_MINORS); + return PTR_ERR(dma_heap_class); + } + dma_heap_class->devnode = dma_heap_devnode; + + return 0; +} +subsys_initcall(dma_heap_init); diff --git a/include/linux/dma-heap.h b/include/linux/dma-heap.h new file mode 100644 index 000000000000..454e354d1ffb --- /dev/null +++ b/include/linux/dma-heap.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * DMABUF Heaps Allocation Infrastructure + * + * Copyright (C) 2011 Google, Inc. + * Copyright (C) 2019 Linaro Ltd. + */ + +#ifndef _DMA_HEAPS_H +#define _DMA_HEAPS_H + +#include +#include + +struct dma_heap; + +/** + * struct dma_heap_ops - ops to operate on a given heap + * @allocate: allocate dmabuf and return fd + * + * allocate returns dmabuf fd on success, -errno on error. + */ +struct dma_heap_ops { + int (*allocate)(struct dma_heap *heap, + unsigned long len, + unsigned long fd_flags, + unsigned long heap_flags); +}; + +/** + * struct dma_heap_export_info - information needed to export a new dmabuf heap + * @name: used for debugging/device-node name + * @ops: ops struct for this heap + * @priv: heap exporter private data + * + * Information needed to export a new dmabuf heap. + */ +struct dma_heap_export_info { + const char *name; + const struct dma_heap_ops *ops; + void *priv; +}; + +/** + * dma_heap_get_drvdata() - get per-heap driver data + * @heap: DMA-Heap to retrieve private data for + * + * Returns: + * The per-heap data for the heap. + */ +void *dma_heap_get_drvdata(struct dma_heap *heap); + +/** + * dma_heap_add - adds a heap to dmabuf heaps + * @exp_info: information needed to register this heap + */ +struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info); + +#endif /* _DMA_HEAPS_H */ diff --git a/include/uapi/linux/dma-heap.h b/include/uapi/linux/dma-heap.h new file mode 100644 index 000000000000..6ce5cc68d238 --- /dev/null +++ b/include/uapi/linux/dma-heap.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * DMABUF Heaps Userspace API + * + * Copyright (C) 2011 Google, Inc. + * Copyright (C) 2019 Linaro Ltd. + */ +#ifndef _UAPI_LINUX_DMABUF_POOL_H +#define _UAPI_LINUX_DMABUF_POOL_H + +#include +#include + +/** + * DOC: DMABUF Heaps Userspace API + */ + +/* Valid FD_FLAGS are O_CLOEXEC, O_RDONLY, O_WRONLY, O_RDWR */ +#define DMA_HEAP_VALID_FD_FLAGS (O_CLOEXEC | O_ACCMODE) + +/* Currently no heap flags */ +#define DMA_HEAP_VALID_HEAP_FLAGS (0) + +/** + * struct dma_heap_allocation_data - metadata passed from userspace for + * allocations + * @len: size of the allocation + * @fd: will be populated with a fd which provdes the + * handle to the allocated dma-buf + * @fd_flags: file descriptor flags used when allocating + * @heap_flags: flags passed to heap + * + * Provided by userspace as an argument to the ioctl + */ +struct dma_heap_allocation_data { + __u64 len; + __u32 fd; + __u32 fd_flags; + __u64 heap_flags; + __u32 reserved0; + __u32 reserved1; +}; + +#define DMA_HEAP_IOC_MAGIC 'H' + +/** + * DOC: DMA_HEAP_IOC_ALLOC - allocate memory from pool + * + * Takes an dma_heap_allocation_data struct and returns it with the fd field + * populated with the dmabuf handle of the allocation. + */ +#define DMA_HEAP_IOC_ALLOC _IOWR(DMA_HEAP_IOC_MAGIC, 0, \ + struct dma_heap_allocation_data) + +#endif /* _UAPI_LINUX_DMABUF_POOL_H */ -- cgit v1.2.3 From a0e047156cdebbccf253768b39d7e1dbf954c449 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 29 Oct 2019 20:23:38 +0000 Subject: drm/i915/gem: Make context persistence optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our existing behaviour is to allow contexts and their GPU requests to persist past the point of closure until the requests are complete. This allows clients to operate in a 'fire-and-forget' manner where they can setup a rendering pipeline and hand it over to the display server and immediately exit. As the rendering pipeline is kept alive until completion, the display server (or other consumer) can use the results in the future and present them to the user. The compute model is a little different. They have little to no buffer sharing between processes as their kernels tend to operate on a continuous stream, feeding the results back to the client application. These kernels operate for an indeterminate length of time, with many clients wishing that the kernel was always running for as long as they keep feeding in the data, i.e. acting like a DSP. Not all clients want this persistent "desktop" behaviour and would prefer that the contexts are cleaned up immediately upon closure. This ensures that when clients are run without hangchecking (e.g. for compute kernels of indeterminate runtime), any GPU hang or other unexpected workloads are terminated with the process and does not continue to hog resources. The default behaviour for new contexts is the legacy persistence mode, as some desktop applications are dependent upon the existing behaviour. New clients will have to opt in to immediate cleanup on context closure. If the hangchecking modparam is disabled, so is persistent context support -- all contexts will be terminated on closure. We expect this behaviour change to be welcomed by compute users, who have often been caught between a rock and a hard place. They disable hangchecking to avoid their kernels being "unfairly" declared hung, but have also experienced true hangs that the system was then unable to clean up. Naturally, this leads to bug reports. Testcase: igt/gem_ctx_persistence Link: https://github.com/intel/compute-runtime/pull/228 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Michał Winiarski Cc: Jon Bloomfield Reviewed-by: Jon Bloomfield Reviewed-by: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Acked-by: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20191029202338.8841-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 50 ++++++++++++++++++++++- drivers/gpu/drm/i915/gem/i915_gem_context.h | 15 +++++++ drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 1 + drivers/gpu/drm/i915/gem/selftests/mock_context.c | 2 + include/uapi/drm/i915_drm.h | 15 +++++++ 5 files changed, 82 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 414fc55c9dd0..cbdf2fb32636 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -438,12 +438,39 @@ static void context_close(struct i915_gem_context *ctx) * case we opt to forcibly kill off all remaining requests on * context close. */ - if (!i915_modparams.enable_hangcheck) + if (!i915_gem_context_is_persistent(ctx) || + !i915_modparams.enable_hangcheck) kill_context(ctx); i915_gem_context_put(ctx); } +static int __context_set_persistence(struct i915_gem_context *ctx, bool state) +{ + if (i915_gem_context_is_persistent(ctx) == state) + return 0; + + if (state) { + /* + * Only contexts that are short-lived [that will expire or be + * reset] are allowed to survive past termination. We require + * hangcheck to ensure that the persistent requests are healthy. + */ + if (!i915_modparams.enable_hangcheck) + return -EINVAL; + + i915_gem_context_set_persistence(ctx); + } else { + /* To cancel a context we use "preempt-to-idle" */ + if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) + return -ENODEV; + + i915_gem_context_clear_persistence(ctx); + } + + return 0; +} + static struct i915_gem_context * __create_context(struct drm_i915_private *i915) { @@ -478,6 +505,7 @@ __create_context(struct drm_i915_private *i915) i915_gem_context_set_bannable(ctx); i915_gem_context_set_recoverable(ctx); + __context_set_persistence(ctx, true /* cgroup hook? */); for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; @@ -634,6 +662,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) return ctx; i915_gem_context_clear_bannable(ctx); + i915_gem_context_set_persistence(ctx); ctx->sched.priority = I915_USER_PRIORITY(prio); GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); @@ -1744,6 +1773,16 @@ err_free: return err; } +static int +set_persistence(struct i915_gem_context *ctx, + const struct drm_i915_gem_context_param *args) +{ + if (args->size) + return -EINVAL; + + return __context_set_persistence(ctx, args->value); +} + static int ctx_setparam(struct drm_i915_file_private *fpriv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1821,6 +1860,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_engines(ctx, args); break; + case I915_CONTEXT_PARAM_PERSISTENCE: + ret = set_persistence(ctx, args); + break; + case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; @@ -2273,6 +2316,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, ret = get_engines(ctx, args); break; + case I915_CONTEXT_PARAM_PERSISTENCE: + args->size = 0; + args->value = i915_gem_context_is_persistent(ctx); + break; + case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index cfe80590f0ed..18e50a769a6e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -76,6 +76,21 @@ static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *c clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); } +static inline bool i915_gem_context_is_persistent(const struct i915_gem_context *ctx) +{ + return test_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); +} + +static inline void i915_gem_context_set_persistence(struct i915_gem_context *ctx) +{ + set_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); +} + +static inline void i915_gem_context_clear_persistence(struct i915_gem_context *ctx) +{ + clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); +} + static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx) { return test_bit(CONTEXT_BANNED, &ctx->flags); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index fe97b8ba4fda..861d7d92fe9f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -137,6 +137,7 @@ struct i915_gem_context { #define UCONTEXT_NO_ERROR_CAPTURE 1 #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 +#define UCONTEXT_PERSISTENCE 4 /** * @flags: small set of booleans diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 74ddd682c9cd..29b8984f0e47 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -22,6 +22,8 @@ mock_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915; + i915_gem_context_set_persistence(ctx); + mutex_init(&ctx->engines_mutex); e = default_engines(ctx); if (IS_ERR(e)) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 63d40cba97e0..5400d7e057f1 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1572,6 +1572,21 @@ struct drm_i915_gem_context_param { * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) */ #define I915_CONTEXT_PARAM_ENGINES 0xa + +/* + * I915_CONTEXT_PARAM_PERSISTENCE: + * + * Allow the context and active rendering to survive the process until + * completion. Persistence allows fire-and-forget clients to queue up a + * bunch of work, hand the output over to a display server and then quit. + * If the context is marked as not persistent, upon closing (either via + * an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file closure + * or process termination), the context and any outstanding requests will be + * cancelled (and exported fences for cancelled requests marked as -EIO). + * + * By default, new contexts allow persistence. + */ +#define I915_CONTEXT_PARAM_PERSISTENCE 0xb /* Must be kept compact -- no holes and well documented */ __u64 value; -- cgit v1.2.3 From fae7d7d5f374eadbb0b5dd31b39162e7176e9c3d Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Wed, 30 Oct 2019 16:29:54 -0400 Subject: Revert "dma-buf: Add dma-buf heaps framework" This reverts commit a69b0e855d3fd278ff6f09a23e1edf929538e304. This patchset doesn't meet the UAPI requirements set out in [1] for the DRM subsystem. Once the userspace component is reviewed and ready for merge we can try again. [1]- https://01.org/linuxgraphics/gfx-docs/drm/gpu/drm-uapi.html#open-source-userspace-requirements Fixes: a69b0e855d3f ("dma-buf: Add dma-buf heaps framework") Cc: Laura Abbott Cc: Benjamin Gaignard Cc: Sumit Semwal Cc: Liam Mark Cc: Pratik Patel Cc: Brian Starkey Cc: Vincent Donnefort Cc: Sudipto Paul Cc: Andrew F. Davis Cc: Christoph Hellwig Cc: Chenbo Feng Cc: Alistair Strachan Cc: Hridya Valsaraju Cc: Hillf Danton Cc: dri-devel@lists.freedesktop.org Cc: Brian Starkey Cc: John Stultz Cc: Mauro Carvalho Chehab Cc: "David S. Miller" Cc: Greg Kroah-Hartman Cc: Rob Herring Cc: Jonathan Cameron Cc: "Paul E. McKenney" Cc: Sean Paul Cc: "Andrew F. Davis" Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Acked-by: David Airlie Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20191030203003.101156-6-sean@poorly.run --- MAINTAINERS | 18 --- drivers/dma-buf/Kconfig | 9 -- drivers/dma-buf/Makefile | 1 - drivers/dma-buf/dma-heap.c | 269 ------------------------------------------ include/linux/dma-heap.h | 59 --------- include/uapi/linux/dma-heap.h | 55 --------- 6 files changed, 411 deletions(-) delete mode 100644 drivers/dma-buf/dma-heap.c delete mode 100644 include/linux/dma-heap.h delete mode 100644 include/uapi/linux/dma-heap.h (limited to 'include/uapi') diff --git a/MAINTAINERS b/MAINTAINERS index 5f39067caa7c..b63c291ad029 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4942,24 +4942,6 @@ F: include/linux/*fence.h F: Documentation/driver-api/dma-buf.rst T: git git://anongit.freedesktop.org/drm/drm-misc -DMA-BUF HEAPS FRAMEWORK -M: Sumit Semwal -R: Andrew F. Davis -R: Benjamin Gaignard -R: Liam Mark -R: Laura Abbott -R: Brian Starkey -R: John Stultz -S: Maintained -L: linux-media@vger.kernel.org -L: dri-devel@lists.freedesktop.org -L: linaro-mm-sig@lists.linaro.org (moderated for non-subscribers) -F: include/uapi/linux/dma-heap.h -F: include/linux/dma-heap.h -F: drivers/dma-buf/dma-heap.c -F: drivers/dma-buf/heaps/* -T: git git://anongit.freedesktop.org/drm/drm-misc - DMA GENERIC OFFLOAD ENGINE SUBSYSTEM M: Vinod Koul L: dmaengine@vger.kernel.org diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig index bffa58fc3e6e..a23b6752d11a 100644 --- a/drivers/dma-buf/Kconfig +++ b/drivers/dma-buf/Kconfig @@ -44,13 +44,4 @@ config DMABUF_SELFTESTS default n depends on DMA_SHARED_BUFFER -menuconfig DMABUF_HEAPS - bool "DMA-BUF Userland Memory Heaps" - select DMA_SHARED_BUFFER - help - Choose this option to enable the DMA-BUF userland memory heaps. - This options creates per heap chardevs in /dev/dma_heap/ which - allows userspace to allocate dma-bufs that can be shared - between drivers. - endmenu diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile index caee5eb3d351..03479da06422 100644 --- a/drivers/dma-buf/Makefile +++ b/drivers/dma-buf/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \ dma-resv.o seqno-fence.o -obj-$(CONFIG_DMABUF_HEAPS) += dma-heap.o obj-$(CONFIG_SYNC_FILE) += sync_file.o obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o obj-$(CONFIG_UDMABUF) += udmabuf.o diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c deleted file mode 100644 index 9a41b73e54b4..000000000000 --- a/drivers/dma-buf/dma-heap.c +++ /dev/null @@ -1,269 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Framework for userspace DMA-BUF allocations - * - * Copyright (C) 2011 Google, Inc. - * Copyright (C) 2019 Linaro Ltd. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DEVNAME "dma_heap" - -#define NUM_HEAP_MINORS 128 - -/** - * struct dma_heap - represents a dmabuf heap in the system - * @name: used for debugging/device-node name - * @ops: ops struct for this heap - * @minor minor number of this heap device - * @heap_devt heap device node - * @heap_cdev heap char device - * - * Represents a heap of memory from which buffers can be made. - */ -struct dma_heap { - const char *name; - const struct dma_heap_ops *ops; - void *priv; - unsigned int minor; - dev_t heap_devt; - struct list_head list; - struct cdev heap_cdev; -}; - -static LIST_HEAD(heap_list); -static DEFINE_MUTEX(heap_list_lock); -static dev_t dma_heap_devt; -static struct class *dma_heap_class; -static DEFINE_XARRAY_ALLOC(dma_heap_minors); - -static int dma_heap_buffer_alloc(struct dma_heap *heap, size_t len, - unsigned int fd_flags, - unsigned int heap_flags) -{ - /* - * Allocations from all heaps have to begin - * and end on page boundaries. - */ - len = PAGE_ALIGN(len); - if (!len) - return -EINVAL; - - return heap->ops->allocate(heap, len, fd_flags, heap_flags); -} - -static int dma_heap_open(struct inode *inode, struct file *file) -{ - struct dma_heap *heap; - - heap = xa_load(&dma_heap_minors, iminor(inode)); - if (!heap) { - pr_err("dma_heap: minor %d unknown.\n", iminor(inode)); - return -ENODEV; - } - - /* instance data as context */ - file->private_data = heap; - nonseekable_open(inode, file); - - return 0; -} - -static long dma_heap_ioctl_allocate(struct file *file, unsigned long arg) -{ - struct dma_heap_allocation_data heap_allocation; - struct dma_heap *heap = file->private_data; - int fd; - - if (copy_from_user(&heap_allocation, (void __user *)arg, - sizeof(heap_allocation))) - return -EFAULT; - - if (heap_allocation.fd || - heap_allocation.reserved0 || - heap_allocation.reserved1) { - pr_warn_once("dma_heap: ioctl data not valid\n"); - return -EINVAL; - } - - if (heap_allocation.fd_flags & ~DMA_HEAP_VALID_FD_FLAGS) { - pr_warn_once("dma_heap: fd_flags has invalid or unsupported flags set\n"); - return -EINVAL; - } - - if (heap_allocation.heap_flags & ~DMA_HEAP_VALID_HEAP_FLAGS) { - pr_warn_once("dma_heap: heap flags has invalid or unsupported flags set\n"); - return -EINVAL; - } - - fd = dma_heap_buffer_alloc(heap, heap_allocation.len, - heap_allocation.fd_flags, - heap_allocation.heap_flags); - if (fd < 0) - return fd; - - heap_allocation.fd = fd; - - if (copy_to_user((void __user *)arg, &heap_allocation, - sizeof(heap_allocation))) { - ksys_close(fd); - return -EFAULT; - } - - return 0; -} - -static long dma_heap_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - int ret = 0; - - switch (cmd) { - case DMA_HEAP_IOC_ALLOC: - ret = dma_heap_ioctl_allocate(file, arg); - break; - default: - return -ENOTTY; - } - - return ret; -} - -static const struct file_operations dma_heap_fops = { - .owner = THIS_MODULE, - .open = dma_heap_open, - .unlocked_ioctl = dma_heap_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = dma_heap_ioctl, -#endif -}; - -/** - * dma_heap_get_drvdata() - get per-subdriver data for the heap - * @heap: DMA-Heap to retrieve private data for - * - * Returns: - * The per-subdriver data for the heap. - */ -void *dma_heap_get_drvdata(struct dma_heap *heap) -{ - return heap->priv; -} - -struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) -{ - struct dma_heap *heap, *h, *err_ret; - struct device *dev_ret; - int ret; - - if (!exp_info->name || !strcmp(exp_info->name, "")) { - pr_err("dma_heap: Cannot add heap without a name\n"); - return ERR_PTR(-EINVAL); - } - - if (!exp_info->ops || !exp_info->ops->allocate) { - pr_err("dma_heap: Cannot add heap with invalid ops struct\n"); - return ERR_PTR(-EINVAL); - } - - /* check the name is unique */ - mutex_lock(&heap_list_lock); - list_for_each_entry(h, &heap_list, list) { - if (!strcmp(h->name, exp_info->name)) { - mutex_unlock(&heap_list_lock); - pr_err("dma_heap: Already registered heap named %s\n", - exp_info->name); - return ERR_PTR(-EINVAL); - } - } - mutex_unlock(&heap_list_lock); - - heap = kzalloc(sizeof(*heap), GFP_KERNEL); - if (!heap) - return ERR_PTR(-ENOMEM); - - heap->name = exp_info->name; - heap->ops = exp_info->ops; - heap->priv = exp_info->priv; - - /* Find unused minor number */ - ret = xa_alloc(&dma_heap_minors, &heap->minor, heap, - XA_LIMIT(0, NUM_HEAP_MINORS - 1), GFP_KERNEL); - if (ret < 0) { - pr_err("dma_heap: Unable to get minor number for heap\n"); - err_ret = ERR_PTR(ret); - goto err0; - } - - /* Create device */ - heap->heap_devt = MKDEV(MAJOR(dma_heap_devt), heap->minor); - - cdev_init(&heap->heap_cdev, &dma_heap_fops); - ret = cdev_add(&heap->heap_cdev, heap->heap_devt, 1); - if (ret < 0) { - pr_err("dma_heap: Unable to add char device\n"); - err_ret = ERR_PTR(ret); - goto err1; - } - - dev_ret = device_create(dma_heap_class, - NULL, - heap->heap_devt, - NULL, - heap->name); - if (IS_ERR(dev_ret)) { - pr_err("dma_heap: Unable to create device\n"); - err_ret = ERR_CAST(dev_ret); - goto err2; - } - /* Add heap to the list */ - mutex_lock(&heap_list_lock); - list_add(&heap->list, &heap_list); - mutex_unlock(&heap_list_lock); - - return heap; - -err2: - cdev_del(&heap->heap_cdev); -err1: - xa_erase(&dma_heap_minors, heap->minor); -err0: - kfree(heap); - return err_ret; -} - -static char *dma_heap_devnode(struct device *dev, umode_t *mode) -{ - return kasprintf(GFP_KERNEL, "dma_heap/%s", dev_name(dev)); -} - -static int dma_heap_init(void) -{ - int ret; - - ret = alloc_chrdev_region(&dma_heap_devt, 0, NUM_HEAP_MINORS, DEVNAME); - if (ret) - return ret; - - dma_heap_class = class_create(THIS_MODULE, DEVNAME); - if (IS_ERR(dma_heap_class)) { - unregister_chrdev_region(dma_heap_devt, NUM_HEAP_MINORS); - return PTR_ERR(dma_heap_class); - } - dma_heap_class->devnode = dma_heap_devnode; - - return 0; -} -subsys_initcall(dma_heap_init); diff --git a/include/linux/dma-heap.h b/include/linux/dma-heap.h deleted file mode 100644 index 454e354d1ffb..000000000000 --- a/include/linux/dma-heap.h +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * DMABUF Heaps Allocation Infrastructure - * - * Copyright (C) 2011 Google, Inc. - * Copyright (C) 2019 Linaro Ltd. - */ - -#ifndef _DMA_HEAPS_H -#define _DMA_HEAPS_H - -#include -#include - -struct dma_heap; - -/** - * struct dma_heap_ops - ops to operate on a given heap - * @allocate: allocate dmabuf and return fd - * - * allocate returns dmabuf fd on success, -errno on error. - */ -struct dma_heap_ops { - int (*allocate)(struct dma_heap *heap, - unsigned long len, - unsigned long fd_flags, - unsigned long heap_flags); -}; - -/** - * struct dma_heap_export_info - information needed to export a new dmabuf heap - * @name: used for debugging/device-node name - * @ops: ops struct for this heap - * @priv: heap exporter private data - * - * Information needed to export a new dmabuf heap. - */ -struct dma_heap_export_info { - const char *name; - const struct dma_heap_ops *ops; - void *priv; -}; - -/** - * dma_heap_get_drvdata() - get per-heap driver data - * @heap: DMA-Heap to retrieve private data for - * - * Returns: - * The per-heap data for the heap. - */ -void *dma_heap_get_drvdata(struct dma_heap *heap); - -/** - * dma_heap_add - adds a heap to dmabuf heaps - * @exp_info: information needed to register this heap - */ -struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info); - -#endif /* _DMA_HEAPS_H */ diff --git a/include/uapi/linux/dma-heap.h b/include/uapi/linux/dma-heap.h deleted file mode 100644 index 6ce5cc68d238..000000000000 --- a/include/uapi/linux/dma-heap.h +++ /dev/null @@ -1,55 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * DMABUF Heaps Userspace API - * - * Copyright (C) 2011 Google, Inc. - * Copyright (C) 2019 Linaro Ltd. - */ -#ifndef _UAPI_LINUX_DMABUF_POOL_H -#define _UAPI_LINUX_DMABUF_POOL_H - -#include -#include - -/** - * DOC: DMABUF Heaps Userspace API - */ - -/* Valid FD_FLAGS are O_CLOEXEC, O_RDONLY, O_WRONLY, O_RDWR */ -#define DMA_HEAP_VALID_FD_FLAGS (O_CLOEXEC | O_ACCMODE) - -/* Currently no heap flags */ -#define DMA_HEAP_VALID_HEAP_FLAGS (0) - -/** - * struct dma_heap_allocation_data - metadata passed from userspace for - * allocations - * @len: size of the allocation - * @fd: will be populated with a fd which provdes the - * handle to the allocated dma-buf - * @fd_flags: file descriptor flags used when allocating - * @heap_flags: flags passed to heap - * - * Provided by userspace as an argument to the ioctl - */ -struct dma_heap_allocation_data { - __u64 len; - __u32 fd; - __u32 fd_flags; - __u64 heap_flags; - __u32 reserved0; - __u32 reserved1; -}; - -#define DMA_HEAP_IOC_MAGIC 'H' - -/** - * DOC: DMA_HEAP_IOC_ALLOC - allocate memory from pool - * - * Takes an dma_heap_allocation_data struct and returns it with the fd field - * populated with the dmabuf handle of the allocation. - */ -#define DMA_HEAP_IOC_ALLOC _IOWR(DMA_HEAP_IOC_MAGIC, 0, \ - struct dma_heap_allocation_data) - -#endif /* _UAPI_LINUX_DMABUF_POOL_H */ -- cgit v1.2.3