From 810ddc3ab52dd84f4cd28ee8673678aece457a59 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 23 Aug 2016 13:25:49 -0400
Subject: drm/amdgpu: rename suspend_kms and resume_kms

The old names were dragged over from radeon.  The new ones
better match the naming conventions used in the driver.

No functional change.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index a631c954c4e2..1b787d974515 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -397,28 +397,28 @@ static int amdgpu_pmops_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
-	return amdgpu_suspend_kms(drm_dev, true, true);
+	return amdgpu_device_suspend(drm_dev, true, true);
 }
 
 static int amdgpu_pmops_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
-	return amdgpu_resume_kms(drm_dev, true, true);
+	return amdgpu_device_resume(drm_dev, true, true);
 }
 
 static int amdgpu_pmops_freeze(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
-	return amdgpu_suspend_kms(drm_dev, false, true);
+	return amdgpu_device_suspend(drm_dev, false, true);
 }
 
 static int amdgpu_pmops_thaw(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
-	return amdgpu_resume_kms(drm_dev, false, true);
+	return amdgpu_device_resume(drm_dev, false, true);
 }
 
 static int amdgpu_pmops_runtime_suspend(struct device *dev)
@@ -436,7 +436,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
 	drm_kms_helper_poll_disable(drm_dev);
 	vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_OFF);
 
-	ret = amdgpu_suspend_kms(drm_dev, false, false);
+	ret = amdgpu_device_suspend(drm_dev, false, false);
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
 	pci_ignore_hotplug(pdev);
@@ -469,7 +469,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
 		return ret;
 	pci_set_master(pdev);
 
-	ret = amdgpu_resume_kms(drm_dev, false, false);
+	ret = amdgpu_device_resume(drm_dev, false, false);
 	drm_kms_helper_poll_enable(drm_dev);
 	vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_ON);
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
-- 
cgit v1.2.3


From 95844d20ae024b5d553c9923a0d3145c3956bf69 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Wed, 17 Aug 2016 23:49:27 +0200
Subject: drm/amdgpu: throttle buffer migrations at CS using a fixed MBps limit
 (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The old mechanism used a per-submission limit that didn't take previous
submissions within the same time frame into account. It also filled VRAM
slowly when VRAM usage dropped due to a big eviction or buffer deallocation.

This new method establishes a configurable MBps limit that is obeyed when
VRAM usage is very high. When VRAM usage is not very high, it gives
the driver the freedom to fill it quickly. The result is more consistent
performance.

It can't keep the BO move rate low if lots of evictions are happening due
to VRAM fragmentation, or if a big buffer is being migrated.

The amdgpu.moverate parameter can be used to set a non-default limit.
Measurements can be done to find out which amdgpu.moverate setting gives
the best results.

Mainly APUs and cards with small VRAM will benefit from this. For F1 2015,
anything with 2 GB VRAM or less will benefit.

Some benchmark results - F1 2015 (Tonga 2GB):

Limit      MinFPS AvgFPS
Old code:  14     32.6
128 MB/s:  28     41
64 MB/s:   15.5   43
32 MB/s:   28.7   43.4
8 MB/s:    27.8   44.4
8 MB/s:    21.9   42.8 (different run)

Random drops in Min FPS can still occur (due to fragmented VRAM?), but
the average FPS is much better. 8 MB/s is probably a good limit for this
game & the current VRAM management. The random FPS drops are still to be
tackled.

v2: use a spinlock

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |   9 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 152 ++++++++++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  10 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |   4 +
 4 files changed, 127 insertions(+), 48 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4cfcf9c37800..938ef1cb68cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -64,6 +64,7 @@
 extern int amdgpu_modeset;
 extern int amdgpu_vram_limit;
 extern int amdgpu_gart_size;
+extern int amdgpu_moverate;
 extern int amdgpu_benchmarking;
 extern int amdgpu_testing;
 extern int amdgpu_audio;
@@ -2034,6 +2035,14 @@ struct amdgpu_device {
 	atomic64_t			num_evictions;
 	atomic_t			gpu_reset_counter;
 
+	/* data for buffer migration throttling */
+	struct {
+		spinlock_t		lock;
+		s64			last_update_us;
+		s64			accum_us; /* accumulated microseconds */
+		u32			log2_max_MBps;
+	} mm_stats;
+
 	/* display */
 	bool				enable_virtual_display;
 	struct amdgpu_mode_info		mode_info;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d80e5d3a4add..82927570333a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -235,56 +235,115 @@ free_chunk:
 	return ret;
 }
 
-/* Returns how many bytes TTM can move per IB.
+/* Convert microseconds to bytes. */
+static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
+{
+	if (us <= 0 || !adev->mm_stats.log2_max_MBps)
+		return 0;
+
+	/* Since accum_us is incremented by a million per second, just
+	 * multiply it by the number of MB/s to get the number of bytes.
+	 */
+	return us << adev->mm_stats.log2_max_MBps;
+}
+
+static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
+{
+	if (!adev->mm_stats.log2_max_MBps)
+		return 0;
+
+	return bytes >> adev->mm_stats.log2_max_MBps;
+}
+
+/* Returns how many bytes TTM can move right now. If no bytes can be moved,
+ * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
+ * which means it can go over the threshold once. If that happens, the driver
+ * will be in debt and no other buffer migrations can be done until that debt
+ * is repaid.
+ *
+ * This approach allows moving a buffer of any size (it's important to allow
+ * that).
+ *
+ * The currency is simply time in microseconds and it increases as the clock
+ * ticks. The accumulated microseconds (us) are converted to bytes and
+ * returned.
  */
 static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
 {
-	u64 real_vram_size = adev->mc.real_vram_size;
-	u64 vram_usage = atomic64_read(&adev->vram_usage);
+	s64 time_us, increment_us;
+	u64 max_bytes;
+	u64 free_vram, total_vram, used_vram;
 
-	/* This function is based on the current VRAM usage.
+	/* Allow a maximum of 200 accumulated ms. This is basically per-IB
+	 * throttling.
 	 *
-	 * - If all of VRAM is free, allow relocating the number of bytes that
-	 *   is equal to 1/4 of the size of VRAM for this IB.
+	 * It means that in order to get full max MBps, at least 5 IBs per
+	 * second must be submitted and not more than 200ms apart from each
+	 * other.
+	 */
+	const s64 us_upper_bound = 200000;
 
-	 * - If more than one half of VRAM is occupied, only allow relocating
-	 *   1 MB of data for this IB.
-	 *
-	 * - From 0 to one half of used VRAM, the threshold decreases
-	 *   linearly.
-	 *         __________________
-	 * 1/4 of -|\               |
-	 * VRAM    | \              |
-	 *         |  \             |
-	 *         |   \            |
-	 *         |    \           |
-	 *         |     \          |
-	 *         |      \         |
-	 *         |       \________|1 MB
-	 *         |----------------|
-	 *    VRAM 0 %             100 %
-	 *         used            used
-	 *
-	 * Note: It's a threshold, not a limit. The threshold must be crossed
-	 * for buffer relocations to stop, so any buffer of an arbitrary size
-	 * can be moved as long as the threshold isn't crossed before
-	 * the relocation takes place. We don't want to disable buffer
-	 * relocations completely.
+	if (!adev->mm_stats.log2_max_MBps)
+		return 0;
+
+	total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
+	used_vram = atomic64_read(&adev->vram_usage);
+	free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
+
+	spin_lock(&adev->mm_stats.lock);
+
+	/* Increase the amount of accumulated us. */
+	time_us = ktime_to_us(ktime_get());
+	increment_us = time_us - adev->mm_stats.last_update_us;
+	adev->mm_stats.last_update_us = time_us;
+	adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
+                                      us_upper_bound);
+
+	/* This prevents the short period of low performance when the VRAM
+	 * usage is low and the driver is in debt or doesn't have enough
+	 * accumulated us to fill VRAM quickly.
 	 *
-	 * The idea is that buffers should be placed in VRAM at creation time
-	 * and TTM should only do a minimum number of relocations during
-	 * command submission. In practice, you need to submit at least
-	 * a dozen IBs to move all buffers to VRAM if they are in GTT.
+	 * The situation can occur in these cases:
+	 * - a lot of VRAM is freed by userspace
+	 * - the presence of a big buffer causes a lot of evictions
+	 *   (solution: split buffers into smaller ones)
 	 *
-	 * Also, things can get pretty crazy under memory pressure and actual
-	 * VRAM usage can change a lot, so playing safe even at 50% does
-	 * consistently increase performance.
+	 * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
+	 * accum_us to a positive number.
 	 */
+	if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
+		s64 min_us;
+
+		/* Be more aggresive on dGPUs. Try to fill a portion of free
+		 * VRAM now.
+		 */
+		if (!(adev->flags & AMD_IS_APU))
+			min_us = bytes_to_us(adev, free_vram / 4);
+		else
+			min_us = 0; /* Reset accum_us on APUs. */
+
+		adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
+	}
 
-	u64 half_vram = real_vram_size >> 1;
-	u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
-	u64 bytes_moved_threshold = half_free_vram >> 1;
-	return max(bytes_moved_threshold, 1024*1024ull);
+	/* This returns 0 if the driver is in debt to disallow (optional)
+	 * buffer moves.
+	 */
+	max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+
+	spin_unlock(&adev->mm_stats.lock);
+	return max_bytes;
+}
+
+/* Report how many bytes have really been moved for the last command
+ * submission. This can result in a debt that can stop buffer migrations
+ * temporarily.
+ */
+static void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev,
+					 u64 num_bytes)
+{
+	spin_lock(&adev->mm_stats.lock);
+	adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
+	spin_unlock(&adev->mm_stats.lock);
 }
 
 static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
@@ -297,15 +356,10 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	if (bo->pin_count)
 		return 0;
 
-	/* Avoid moving this one if we have moved too many buffers
-	 * for this IB already.
-	 *
-	 * Note that this allows moving at least one buffer of
-	 * any size, because it doesn't take the current "bo"
-	 * into account. We don't want to disallow buffer moves
-	 * completely.
+	/* Don't move this buffer if we have depleted our allowance
+	 * to move it. Don't move anything if the threshold is zero.
 	 */
-	if (p->bytes_moved <= p->bytes_moved_threshold)
+	if (p->bytes_moved < p->bytes_moved_threshold)
 		domain = bo->prefered_domains;
 	else
 		domain = bo->allowed_domains;
@@ -494,6 +548,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		goto error_validate;
 	}
 
+	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved);
+
 	fpriv->vm.last_eviction_counter =
 		atomic64_read(&p->adev->num_evictions);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1ef4034b3be5..847583d8a3b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1490,6 +1490,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 {
 	int r, i;
 	bool runtime = false;
+	u32 max_MBps;
 
 	adev->shutdown = false;
 	adev->dev = &pdev->dev;
@@ -1549,6 +1550,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	spin_lock_init(&adev->didt_idx_lock);
 	spin_lock_init(&adev->gc_cac_idx_lock);
 	spin_lock_init(&adev->audio_endpt_idx_lock);
+	spin_lock_init(&adev->mm_stats.lock);
 
 	INIT_LIST_HEAD(&adev->shadow_list);
 	mutex_init(&adev->shadow_list_lock);
@@ -1660,6 +1662,14 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 	adev->accel_working = true;
 
+	/* Initialize the buffer migration limit. */
+	if (amdgpu_moverate >= 0)
+		max_MBps = amdgpu_moverate;
+	else
+		max_MBps = 8; /* Allow 8 MB/s. */
+	/* Get a log2 for easy divisions. */
+	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
+
 	amdgpu_fbdev_init(adev);
 
 	r = amdgpu_ib_pool_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 1b787d974515..6fed75454800 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -62,6 +62,7 @@
 
 int amdgpu_vram_limit = 0;
 int amdgpu_gart_size = -1; /* auto */
+int amdgpu_moverate = -1; /* auto */
 int amdgpu_benchmarking = 0;
 int amdgpu_testing = 0;
 int amdgpu_audio = -1;
@@ -100,6 +101,9 @@ module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
 MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc., -1 = auto)");
 module_param_named(gartsize, amdgpu_gart_size, int, 0600);
 
+MODULE_PARM_DESC(moverate, "Maximum buffer migration rate in MB/s. (32, 64, etc., -1=auto, 0=1=disabled)");
+module_param_named(moverate, amdgpu_moverate, int, 0600);
+
 MODULE_PARM_DESC(benchmark, "Run benchmark");
 module_param_named(benchmark, amdgpu_benchmarking, int, 0444);
 
-- 
cgit v1.2.3


From 78fbb6859b739f7f67c820ab8c9e7a25add977b5 Mon Sep 17 00:00:00 2001
From: Ken Wang <Qingqing.Wang@amd.com>
Date: Thu, 21 Jan 2016 17:33:00 +0800
Subject: drm/amdgpu: add si pciids v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Ken Wang <Qingqing.Wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 74 +++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6fed75454800..28c3dcc170cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -200,6 +200,80 @@ MODULE_PARM_DESC(virtual_display, "Enable virtual display feature (the virtual_d
 module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);
 
 static const struct pci_device_id pciidlist[] = {
+#ifdef  CONFIG_DRM_AMDGPU_SI
+	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x678A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x6790, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x6791, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x6792, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x6798, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x6799, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x679A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x679B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x679E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x679F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x6800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|AMD_IS_MOBILITY},
+	{0x1002, 0x6801, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|AMD_IS_MOBILITY},
+	{0x1002, 0x6802, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|AMD_IS_MOBILITY},
+	{0x1002, 0x6806, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6808, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6809, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6810, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6811, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6816, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6817, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6818, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6819, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6600, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6601, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6602, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6603, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6604, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6605, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6606, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6607, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6608, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
+	{0x1002, 0x6610, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
+	{0x1002, 0x6611, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
+	{0x1002, 0x6613, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
+	{0x1002, 0x6617, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6620, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6621, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6623, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6631, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
+	{0x1002, 0x6820, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6821, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6822, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6823, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6824, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6825, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6826, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6827, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x6829, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x682A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x682B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x682C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x682D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x682F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x6837, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x6838, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x6839, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x683B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x683D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x683F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x6660, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+	{0x1002, 0x6663, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+	{0x1002, 0x6664, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+	{0x1002, 0x6665, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+	{0x1002, 0x6667, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+	{0x1002, 0x666F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+#endif
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	/* Kaveri */
 	{0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
-- 
cgit v1.2.3


From 85e154c22e3c29d4db52ccc1c1cc58a2cadc103b Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Sat, 27 Aug 2016 14:53:08 -0400
Subject: drm/amdgpu: set runtime pm state to active on resume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The sbios always powers up the dGPU on resume.

Acked-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 28c3dcc170cc..94d013526e8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -482,6 +482,14 @@ static int amdgpu_pmops_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+
+	/* GPU comes up enabled by the bios on resume */
+	if (amdgpu_device_is_px(drm_dev)) {
+		pm_runtime_disable(dev);
+		pm_runtime_set_active(dev);
+		pm_runtime_enable(dev);
+	}
+
 	return amdgpu_device_resume(drm_dev, true, true);
 }
 
-- 
cgit v1.2.3


From 61e113067b636fe73d5d0ac877bcfcebe7cfd034 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 22 Aug 2016 13:50:22 -0400
Subject: drm/amdgpu: wire up a pci shutdown callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Normally on shutdown or reboot we don't care about necessarily
making sure the hw is in a good state because the system is about
to be powered down or reset.  However, after a shutdown or reboot
in a VM, it's best to tear down the hw properly otherwise there
can be problems with the next VM use.

Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 94d013526e8e..ea54e3044787 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -471,6 +471,19 @@ amdgpu_pci_remove(struct pci_dev *pdev)
 	drm_put_dev(dev);
 }
 
+static void
+amdgpu_pci_shutdown(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = dev->dev_private;
+
+	/* if we are running in a VM, make sure the device
+	 * torn down properly on reboot/shutdown
+	 */
+	if (adev->virtualization.is_virtual)
+		amdgpu_pci_remove(pdev);
+}
+
 static int amdgpu_pmops_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
@@ -692,6 +705,7 @@ static struct pci_driver amdgpu_kms_pci_driver = {
 	.id_table = pciidlist,
 	.probe = amdgpu_pci_probe,
 	.remove = amdgpu_pci_remove,
+	.shutdown = amdgpu_pci_shutdown,
 	.driver.pm = &amdgpu_pm_ops,
 };
 
-- 
cgit v1.2.3


From 74b0b157845748e5817cae56c891b05d98da5f47 Mon Sep 17 00:00:00 2001
From: jimqu <Jim.Qu@amd.com>
Date: Wed, 7 Sep 2016 17:09:12 +0800
Subject: drm/amd/amdgpu: S4 issue for amdgpu (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

reset the asic if adapter is not powerdown when doing freeze()
thaw() and restore(), in order to get a valid state of adapter.

v2: squash in warning fix from Rex

Signed-off-by: JimQu <Jim.Qu@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Tested By: Shawn Starr <shawn.starr@rogers.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 ++++++++++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 23 +++++++++++++++++++++--
 2 files changed, 35 insertions(+), 8 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1aa0364d7961..d324a079fe9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1895,6 +1895,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 		/* Shut down the device */
 		pci_disable_device(dev->pdev);
 		pci_set_power_state(dev->pdev, PCI_D3hot);
+	} else {
+		r = amdgpu_asic_reset(adev);
+		if (r)
+			DRM_ERROR("amdgpu asic reset failed\n");
 	}
 
 	if (fbcon) {
@@ -1925,22 +1929,26 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	    dev->switch_power_state == DRM_SWITCH_POWER_DYNAMIC_OFF)
 		return 0;
 
-	if (fbcon) {
+	if (fbcon)
 		console_lock();
-	}
+
 	if (resume) {
 		pci_set_power_state(dev->pdev, PCI_D0);
 		pci_restore_state(dev->pdev);
-		if (pci_enable_device(dev->pdev)) {
+		r = pci_enable_device(dev->pdev);
+		if (r) {
 			if (fbcon)
 				console_unlock();
-			return -1;
+			return r;
 		}
 	}
 
 	/* post card */
-	if (!amdgpu_card_posted(adev))
-		amdgpu_atom_asic_init(adev->mode_info.atom_context);
+	if (!amdgpu_card_posted(adev) || !resume) {
+		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
+		if (r)
+			DRM_ERROR("amdgpu asic init failed\n");
+	}
 
 	r = amdgpu_resume(adev);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ea54e3044787..3bbc0faf48c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -487,6 +487,7 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
 static int amdgpu_pmops_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
+
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
 	return amdgpu_device_suspend(drm_dev, true, true);
 }
@@ -509,6 +510,7 @@ static int amdgpu_pmops_resume(struct device *dev)
 static int amdgpu_pmops_freeze(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
+
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
 	return amdgpu_device_suspend(drm_dev, false, true);
 }
@@ -516,6 +518,23 @@ static int amdgpu_pmops_freeze(struct device *dev)
 static int amdgpu_pmops_thaw(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
+
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+	return amdgpu_device_resume(drm_dev, false, true);
+}
+
+static int amdgpu_pmops_poweroff(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+	return amdgpu_device_suspend(drm_dev, true, true);
+}
+
+static int amdgpu_pmops_restore(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
 	return amdgpu_device_resume(drm_dev, false, true);
 }
@@ -622,8 +641,8 @@ static const struct dev_pm_ops amdgpu_pm_ops = {
 	.resume = amdgpu_pmops_resume,
 	.freeze = amdgpu_pmops_freeze,
 	.thaw = amdgpu_pmops_thaw,
-	.poweroff = amdgpu_pmops_freeze,
-	.restore = amdgpu_pmops_resume,
+	.poweroff = amdgpu_pmops_poweroff,
+	.restore = amdgpu_pmops_restore,
 	.runtime_suspend = amdgpu_pmops_runtime_suspend,
 	.runtime_resume = amdgpu_pmops_runtime_resume,
 	.runtime_idle = amdgpu_pmops_runtime_idle,
-- 
cgit v1.2.3


From 753ad49c9fdfc732972b0d03f2889f473ed35e59 Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Fri, 26 Aug 2016 13:28:28 +0800
Subject: drm/amdgpu:implement CONTEXT_CONTROL (v5)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v1:
for gfx8, use CONTEXT_CONTROL package to dynamically
skip preamble CEIB and other load_xxx command in sequence.

v2:
support GFX7 as well.
remove cntxcntl in compute ring funcs because CPC doesn't
support this packet.

v3: fix reduntant judgement in cntxcntl.
v4: some cleanups, don't change cs_submit()
v5: keep old MESA supported & bump up KMS version.

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Ack-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  8 ++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  8 ++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  | 12 +++++++++++-
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   | 20 ++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 30 ++++++++++++++++++++++++++++++
 6 files changed, 79 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 10ec29c50077..717c3b4e1d54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -320,6 +320,7 @@ struct amdgpu_ring_funcs {
 	void (*begin_use)(struct amdgpu_ring *ring);
 	void (*end_use)(struct amdgpu_ring *ring);
 	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
+	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
 };
 
 /*
@@ -966,6 +967,7 @@ struct amdgpu_ctx {
 	spinlock_t		ring_lock;
 	struct fence            **fences;
 	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
+	bool preamble_presented;
 };
 
 struct amdgpu_ctx_mgr {
@@ -1231,6 +1233,10 @@ struct amdgpu_cs_parser {
 	struct amdgpu_bo_list_entry	uf_entry;
 };
 
+#define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0) /* bit set means command submit involves a preamble IB */
+#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST    (1 << 1) /* bit set means preamble IB is first presented in belonging context */
+#define AMDGPU_HAVE_CTX_SWITCH              (1 << 2) /* bit set means context switch occured */
+
 struct amdgpu_job {
 	struct amd_sched_job    base;
 	struct amdgpu_device	*adev;
@@ -1239,6 +1245,7 @@ struct amdgpu_job {
 	struct amdgpu_sync	sync;
 	struct amdgpu_ib	*ibs;
 	struct fence		*fence; /* the hw fence */
+	uint32_t		preamble_status;
 	uint32_t		num_ibs;
 	void			*owner;
 	uint64_t		fence_ctx; /* the fence_context this job uses */
@@ -2276,6 +2283,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
 #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
 #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
+#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 56bde6436a1f..61b7e25535bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -850,6 +850,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		if (r)
 			return r;
 
+		if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
+			parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
+			if (!parser->ctx->preamble_presented) {
+				parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
+				parser->ctx->preamble_presented = true;
+			}
+		}
+
 		if (parser->job->ring && parser->job->ring != ring)
 			return -EINVAL;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 3bbc0faf48c8..ca3d87aac7fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -55,9 +55,10 @@
  * - 3.3.0 - Add VM support for UVD on supported hardware.
  * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
  * - 3.5.0 - Add support for new UVD_NO_OP register.
+ * - 3.6.0 - kmd involves use CONTEXT_CONTROL in ring buffer.
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	5
+#define KMS_DRIVER_MINOR	6
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 04263f0fd1af..2aa741c2a64c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -125,6 +125,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	unsigned patch_offset = ~0;
 	struct amdgpu_vm *vm;
 	uint64_t fence_ctx;
+	uint32_t status = 0;
 
 	unsigned i;
 	int r = 0;
@@ -176,11 +177,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 
 	skip_preamble = ring->current_ctx == fence_ctx;
 	need_ctx_switch = ring->current_ctx != fence_ctx;
+	if (job && ring->funcs->emit_cntxcntl) {
+		if (need_ctx_switch)
+			status |= AMDGPU_HAVE_CTX_SWITCH;
+		status |= job->preamble_status;
+		amdgpu_ring_emit_cntxcntl(ring, status);
+	}
+
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
 
 		/* drop preamble IBs if we don't have a context switch */
-		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
+		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
+			skip_preamble &&
+			!(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST))
 			continue;
 
 		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index a93a803b659e..8c780f6c1276 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2096,6 +2096,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, control);
 }
 
+static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+	uint32_t dw2 = 0;
+
+	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+		/* set load_global_config & load_global_uconfig */
+		dw2 |= 0x8001;
+		/* set load_cs_sh_regs */
+		dw2 |= 0x01000000;
+		/* set load_per_context_state & load_gfx_sh_regs */
+		dw2 |= 0x10002;
+	}
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+	amdgpu_ring_write(ring, dw2);
+	amdgpu_ring_write(ring, 0);
+}
+
 /**
  * gfx_v7_0_ring_test_ib - basic ring IB test
  *
@@ -4938,6 +4957,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
 	.test_ib = gfx_v7_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
 };
 
 static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 44915056297b..dca8b368728c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6076,6 +6076,35 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, 0);
 }
 
+static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+	uint32_t dw2 = 0;
+
+	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+		/* set load_global_config & load_global_uconfig */
+		dw2 |= 0x8001;
+		/* set load_cs_sh_regs */
+		dw2 |= 0x01000000;
+		/* set load_per_context_state & load_gfx_sh_regs for GFX */
+		dw2 |= 0x10002;
+
+		/* set load_ce_ram if preamble presented */
+		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
+			dw2 |= 0x10000000;
+	} else {
+		/* still load_ce_ram if this is the first time preamble presented
+		 * although there is no context switch happens.
+		 */
+		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
+			dw2 |= 0x10000000;
+	}
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+	amdgpu_ring_write(ring, dw2);
+	amdgpu_ring_write(ring, 0);
+}
+
 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 						 enum amdgpu_interrupt_state state)
 {
@@ -6258,6 +6287,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
+	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
-- 
cgit v1.2.3


From 5141e9d2f7811e1ba714e069c4f12b64de67030f Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 6 Sep 2016 16:34:37 +0800
Subject: drm/amd/powerplay: add module parameter to mask pp features
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2e8f469159c5..e69fd332acb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -95,6 +95,7 @@ extern unsigned amdgpu_pg_mask;
 extern char *amdgpu_disable_cu;
 extern int amdgpu_sclk_deep_sleep_en;
 extern char *amdgpu_virtual_display;
+extern unsigned amdgpu_pp_feature_mask;
 
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
 #define AMDGPU_MAX_USEC_TIMEOUT			100000	/* 100 ms */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ca3d87aac7fb..c96ae105b764 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -95,6 +95,7 @@ unsigned amdgpu_cg_mask = 0xffffffff;
 unsigned amdgpu_pg_mask = 0xffffffff;
 char *amdgpu_disable_cu = NULL;
 char *amdgpu_virtual_display = NULL;
+unsigned amdgpu_pp_feature_mask = 0xffffffff;
 
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -177,6 +178,9 @@ module_param_named(powerplay, amdgpu_powerplay, int, 0444);
 
 MODULE_PARM_DESC(powercontainment, "Power Containment (1 = enable (default), 0 = disable)");
 module_param_named(powercontainment, amdgpu_powercontainment, int, 0444);
+
+MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
+module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, int, 0444);
 #endif
 
 MODULE_PARM_DESC(sclkdeepsleep, "SCLK Deep Sleep (1 = enable (default), 0 = disable)");
-- 
cgit v1.2.3