summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-28 17:49:53 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-28 17:49:53 -0700
commit53b3b6bbfde6aae8d1ededc86ad4e0e1e00eb5f8 (patch)
treeb29473f21270aefd113b298c9402be8b4b3c91b4 /drivers/gpu/drm/amd/amdkfd/kfd_crat.c
parent746bb4ed6d626f3f9e431a7f9b20504538e62ded (diff)
parentf2bfc71aee75feff33ca659322b72ffeed5a243d (diff)
downloadblackbird-obmc-linux-53b3b6bbfde6aae8d1ededc86ad4e0e1e00eb5f8.tar.gz
blackbird-obmc-linux-53b3b6bbfde6aae8d1ededc86ad4e0e1e00eb5f8.zip
Merge tag 'drm-next-2018-10-24' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "This is going to rebuild more than drm as it adds a new helper to list.h for doing bulk updates. Seemed like a reasonable addition to me. Otherwise the usual merge window stuff lots of i915 and amdgpu, not so much nouveau, and piles of everything else. Core: - Adds a new list.h helper for doing bulk list updates for TTM. - Don't leak fb address in smem_start to userspace (comes with EXPORT workaround for people using mali out of tree hacks) - udmabuf device to turn memfd regions into dma-buf - Per-plane blend mode property - ref/unref replacements with get/put - fbdev conflicting framebuffers code cleaned up - host-endian format variants - panel orientation quirk for Acer One 10 bridge: - TI SN65DSI86 chip support vkms: - GEM support. - Cursor support amdgpu: - Merge amdkfd and amdgpu into one module - CEC over DP AUX support - Picasso APU support + VCN dynamic powergating - Raven2 APU support - Vega20 enablement + kfd support - ACP powergating improvements - ABGR/XBGR display support - VCN jpeg support - xGMI support - DC i2c/aux cleanup - Ycbcr 4:2:0 support - GPUVM improvements - Powerplay and powerplay endian fixes - Display underflow fixes vmwgfx: - Move vmwgfx specific TTM code to vmwgfx - Split out vmwgfx buffer/resource validation code - Atomic operation rework bochs: - use more helpers - format/byteorder improvements qxl: - use more helpers i915: - GGTT coherency getparam - Turn off resource streamer API - More Icelake enablement + DMC firmware - Full PPGTT for Ivybridge, Haswell and Valleyview - DDB distribution based on resolution - Limited range DP display support nouveau: - CEC over DP AUX support - Initial HDMI 2.0 support virtio-gpu: - vmap support for PRIME objects tegra: - Initial Tegra194 support - DMA/IOMMU integration fixes msm: - a6xx perf improvements + clock prefix - GPU preemption optimisations - a6xx devfreq support - cursor support rockchip: - PX30 support - rgb output interface support mediatek: - HDMI output support on mt2701 and mt7623 rcar-du: - Interlaced modes on Gen3 - LVDS on R8A77980 - D3 and E3 SoC support hisilicon: - misc fixes mxsfb: - runtime pm support sun4i: - R40 TCON support - Allwinner A64 support - R40 HDMI support omapdrm: - Driver rework changing display pipeline ordering to use common code - DMM memory barrier and irq fixes - Errata workarounds exynos: - out-bridge support for LVDS bridge driver - Samsung 16x16 tiled format support - Plane alpha and pixel blend mode support tilcdc: - suspend/resume update mali-dp: - misc updates" * tag 'drm-next-2018-10-24' of git://anongit.freedesktop.org/drm/drm: (1382 commits) firmware/dmc/icl: Add missing MODULE_FIRMWARE() for Icelake. drm/i915/icl: Fix signal_levels drm/i915/icl: Fix DDI/TC port clk_off bits drm/i915/icl: create function to identify combophy port drm/i915/gen9+: Fix initial readout for Y tiled framebuffers drm/i915: Large page offsets for pread/pwrite drm/i915/selftests: Disable shrinker across mmap-exhaustion drm/i915/dp: Link train Fallback on eDP only if fallback link BW can fit panel's native mode drm/i915: Fix intel_dp_mst_best_encoder() drm/i915: Skip vcpi allocation for MSTB ports that are gone drm/i915: Don't unset intel_connector->mst_port drm/i915: Only reset seqno if actually idle drm/i915: Use the correct crtc when sanitizing plane mapping drm/i915: Restore vblank interrupts earlier drm/i915: Check fb stride against plane max stride drm/amdgpu/vcn:Fix uninitialized symbol error drm: panel-orientation-quirks: Add quirk for Acer One 10 (S1003) drm/amd/amdgpu: Fix debugfs error handling drm/amdgpu: Update gc_9_0 golden settings. drm/amd/powerplay: update PPtable with DC BTC and Tvr SocLimit fields ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_crat.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c88
1 files changed, 75 insertions, 13 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index ee4996029a86..56412b0e7e1c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -346,15 +346,15 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
struct list_head *device_list)
{
struct kfd_iolink_properties *props = NULL, *props2;
- struct kfd_topology_device *dev, *cpu_dev;
+ struct kfd_topology_device *dev, *to_dev;
uint32_t id_from;
uint32_t id_to;
id_from = iolink->proximity_domain_from;
id_to = iolink->proximity_domain_to;
- pr_debug("Found IO link entry in CRAT table with id_from=%d\n",
- id_from);
+ pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n",
+ id_from, id_to);
list_for_each_entry(dev, device_list, list) {
if (id_from == dev->proximity_domain) {
props = kfd_alloc_struct(props);
@@ -369,6 +369,8 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
props->weight = 20;
+ else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
+ props->weight = 15;
else
props->weight = node_distance(id_from, id_to);
@@ -389,20 +391,23 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
/* CPU topology is created before GPUs are detected, so CPU->GPU
* links are not built at that time. If a PCIe type is discovered, it
* means a GPU is detected and we are adding GPU->CPU to the topology.
- * At this time, also add the corresponded CPU->GPU link.
+ * At this time, also add the corresponded CPU->GPU link if GPU
+ * is large bar.
+ * For xGMI, we only added the link with one direction in the crat
+ * table, add corresponded reversed direction link now.
*/
- if (props && props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) {
- cpu_dev = kfd_topology_device_by_proximity_domain(id_to);
- if (!cpu_dev)
+ if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {
+ to_dev = kfd_topology_device_by_proximity_domain(id_to);
+ if (!to_dev)
return -ENODEV;
/* same everything but the other direction */
props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
props2->node_from = id_to;
props2->node_to = id_from;
props2->kobj = NULL;
- cpu_dev->io_link_count++;
- cpu_dev->node_props.io_links_count++;
- list_add_tail(&props2->list, &cpu_dev->io_link_props);
+ to_dev->io_link_count++;
+ to_dev->node_props.io_links_count++;
+ list_add_tail(&props2->list, &to_dev->io_link_props);
}
return 0;
@@ -642,6 +647,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
break;
case CHIP_VEGA10:
+ case CHIP_VEGA20:
pcache_info = vega10_cache_info;
num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
break;
@@ -1037,7 +1043,7 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
*
* Return 0 if successful else return -ve value
*/
-static int kfd_fill_gpu_direct_io_link(int *avail_size,
+static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
struct kfd_dev *kdev,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain)
@@ -1052,6 +1058,8 @@ static int kfd_fill_gpu_direct_io_link(int *avail_size,
sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
+ if (kfd_dev_is_large_bar(kdev))
+ sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
/* Fill in IOLINK subtype.
* TODO: Fill-in other fields of iolink subtype
@@ -1069,6 +1077,29 @@ static int kfd_fill_gpu_direct_io_link(int *avail_size,
return 0;
}
+static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
+ struct kfd_dev *kdev,
+ struct crat_subtype_iolink *sub_type_hdr,
+ uint32_t proximity_domain_from,
+ uint32_t proximity_domain_to)
+{
+ *avail_size -= sizeof(struct crat_subtype_iolink);
+ if (*avail_size < 0)
+ return -ENOMEM;
+
+ memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
+
+ sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
+ sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
+ sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED |
+ CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
+
+ sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
+ sub_type_hdr->proximity_domain_from = proximity_domain_from;
+ sub_type_hdr->proximity_domain_to = proximity_domain_to;
+ return 0;
+}
+
/* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
*
* @pcrat_image: Fill in VCRAT for GPU
@@ -1081,14 +1112,16 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
{
struct crat_header *crat_table = (struct crat_header *)pcrat_image;
struct crat_subtype_generic *sub_type_hdr;
+ struct kfd_local_mem_info local_mem_info;
+ struct kfd_topology_device *peer_dev;
struct crat_subtype_computeunit *cu;
struct kfd_cu_info cu_info;
int avail_size = *size;
uint32_t total_num_of_cu;
int num_of_cache_entries = 0;
int cache_mem_filled = 0;
+ uint32_t nid = 0;
int ret = 0;
- struct kfd_local_mem_info local_mem_info;
if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
return -EINVAL;
@@ -1212,7 +1245,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
*/
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
cache_mem_filled);
- ret = kfd_fill_gpu_direct_io_link(&avail_size, kdev,
+ ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
(struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
if (ret < 0)
@@ -1221,6 +1254,35 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
crat_table->length += sub_type_hdr->length;
crat_table->total_entries++;
+
+ /* Fill in Subtype: IO_LINKS
+ * Direct links from GPU to other GPUs through xGMI.
+ * We will loop GPUs that already be processed (with lower value
+ * of proximity_domain), add the link for the GPUs with same
+ * hive id (from this GPU to other GPU) . The reversed iolink
+ * (from other GPU to this GPU) will be added
+ * in kfd_parse_subtype_iolink.
+ */
+ if (kdev->hive_id) {
+ for (nid = 0; nid < proximity_domain; ++nid) {
+ peer_dev = kfd_topology_device_by_proximity_domain(nid);
+ if (!peer_dev->gpu)
+ continue;
+ if (peer_dev->gpu->hive_id != kdev->hive_id)
+ continue;
+ sub_type_hdr = (typeof(sub_type_hdr))(
+ (char *)sub_type_hdr +
+ sizeof(struct crat_subtype_iolink));
+ ret = kfd_fill_gpu_xgmi_link_to_gpu(
+ &avail_size, kdev,
+ (struct crat_subtype_iolink *)sub_type_hdr,
+ proximity_domain, nid);
+ if (ret < 0)
+ return ret;
+ crat_table->length += sub_type_hdr->length;
+ crat_table->total_entries++;
+ }
+ }
*size = crat_table->length;
pr_info("Virtual CRAT table created for GPU\n");
OpenPOWER on IntegriCloud