summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h90
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c343
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c90
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c76
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c118
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c172
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c78
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c128
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c94
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cikd.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_dpm.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_smc.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/fiji_smc.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c83
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_sdma_pkt_open.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_smc.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c66
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_sdma_pkt_open.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_smc.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c2
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h41
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.c497
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.h89
-rw-r--r--drivers/gpu/drm/amd/scheduler/sched_fence.c19
64 files changed, 1480 insertions, 1216 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2fc58e658986..6647fb26ef25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -82,6 +82,7 @@ extern int amdgpu_vm_block_size;
extern int amdgpu_enable_scheduler;
extern int amdgpu_sched_jobs;
extern int amdgpu_sched_hw_submission;
+extern int amdgpu_enable_semaphores;
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
@@ -98,6 +99,9 @@ extern int amdgpu_sched_hw_submission;
#define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 2
+/* max number of IP instances */
+#define AMDGPU_MAX_SDMA_INSTANCES 2
+
/* number of hw syncs before falling back on blocking */
#define AMDGPU_NUM_SYNCS 4
@@ -183,6 +187,7 @@ struct amdgpu_vm;
struct amdgpu_ring;
struct amdgpu_semaphore;
struct amdgpu_cs_parser;
+struct amdgpu_job;
struct amdgpu_irq_src;
struct amdgpu_fpriv;
@@ -246,7 +251,7 @@ struct amdgpu_buffer_funcs {
unsigned copy_num_dw;
/* used for buffer migration */
- void (*emit_copy_buffer)(struct amdgpu_ring *ring,
+ void (*emit_copy_buffer)(struct amdgpu_ib *ib,
/* src addr in bytes */
uint64_t src_offset,
/* dst addr in bytes */
@@ -261,7 +266,7 @@ struct amdgpu_buffer_funcs {
unsigned fill_num_dw;
/* used for buffer clearing */
- void (*emit_fill_buffer)(struct amdgpu_ring *ring,
+ void (*emit_fill_buffer)(struct amdgpu_ib *ib,
/* value to write to memory */
uint32_t src_data,
/* dst addr in bytes */
@@ -339,6 +344,8 @@ struct amdgpu_ring_funcs {
int (*test_ring)(struct amdgpu_ring *ring);
int (*test_ib)(struct amdgpu_ring *ring);
bool (*is_lockup)(struct amdgpu_ring *ring);
+ /* insert NOP packets */
+ void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
};
/*
@@ -426,7 +433,7 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
-void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src,
unsigned irq_type);
@@ -440,8 +447,10 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
- struct amdgpu_fence **fences,
- bool intr, long t);
+ struct fence **array,
+ uint32_t count,
+ bool intr,
+ signed long t);
struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence);
void amdgpu_fence_unref(struct amdgpu_fence **fence);
@@ -514,7 +523,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
uint64_t dst_offset,
uint32_t byte_count,
struct reservation_object *resv,
- struct amdgpu_fence **fence);
+ struct fence **fence);
int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
struct amdgpu_bo_list_entry {
@@ -650,7 +659,7 @@ struct amdgpu_sa_bo {
struct amdgpu_sa_manager *manager;
unsigned soffset;
unsigned eoffset;
- struct amdgpu_fence *fence;
+ struct fence *fence;
};
/*
@@ -692,7 +701,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
struct amdgpu_semaphore *semaphore);
void amdgpu_semaphore_free(struct amdgpu_device *adev,
struct amdgpu_semaphore **semaphore,
- struct amdgpu_fence *fence);
+ struct fence *fence);
/*
* Synchronization
@@ -700,7 +709,8 @@ void amdgpu_semaphore_free(struct amdgpu_device *adev,
struct amdgpu_sync {
struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS];
struct amdgpu_fence *sync_to[AMDGPU_MAX_RINGS];
- struct amdgpu_fence *last_vm_update;
+ DECLARE_HASHTABLE(fences, 4);
+ struct fence *last_vm_update;
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
@@ -712,8 +722,10 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
void *owner);
int amdgpu_sync_rings(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
+struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
+int amdgpu_sync_wait(struct amdgpu_sync *sync);
void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync,
- struct amdgpu_fence *fence);
+ struct fence *fence);
/*
* GART structures, functions & helpers
@@ -871,7 +883,7 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_ib *ibs,
unsigned num_ibs,
- int (*free_job)(struct amdgpu_cs_parser *),
+ int (*free_job)(struct amdgpu_job *),
void *owner,
struct fence **fence);
@@ -879,7 +891,7 @@ struct amdgpu_ring {
struct amdgpu_device *adev;
const struct amdgpu_ring_funcs *funcs;
struct amdgpu_fence_driver fence_drv;
- struct amd_gpu_scheduler *scheduler;
+ struct amd_gpu_scheduler sched;
spinlock_t fence_lock;
struct mutex *ring_lock;
@@ -957,7 +969,7 @@ struct amdgpu_vm_id {
unsigned id;
uint64_t pd_gpu_addr;
/* last flushed PD/PT update */
- struct amdgpu_fence *flushed_updates;
+ struct fence *flushed_updates;
/* last use of vmid */
struct amdgpu_fence *last_id_use;
};
@@ -1042,7 +1054,7 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
- struct fence *fence, uint64_t queued_seq);
+ struct fence *fence);
struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct amdgpu_ring *ring, uint64_t seq);
@@ -1078,8 +1090,6 @@ struct amdgpu_bo_list {
};
struct amdgpu_bo_list *
-amdgpu_bo_list_clone(struct amdgpu_bo_list *list);
-struct amdgpu_bo_list *
amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id);
void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
@@ -1192,8 +1202,6 @@ struct amdgpu_gfx {
struct amdgpu_irq_src priv_inst_irq;
/* gfx status */
uint32_t gfx_current_status;
- /* sync signal for const engine */
- unsigned ce_sync_offs;
/* ce ram size*/
unsigned ce_ram_size;
};
@@ -1210,6 +1218,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
void amdgpu_ring_free_size(struct amdgpu_ring *ring);
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw);
+void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_ring_commit(struct amdgpu_ring *ring);
void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring);
void amdgpu_ring_undo(struct amdgpu_ring *ring);
@@ -1255,15 +1264,19 @@ struct amdgpu_cs_parser {
/* user fence */
struct amdgpu_user_fence uf;
+};
- struct amdgpu_ring *ring;
- struct mutex job_lock;
- struct work_struct job_work;
- int (*prepare_job)(struct amdgpu_cs_parser *sched_job);
- int (*run_job)(struct amdgpu_cs_parser *sched_job);
- int (*free_job)(struct amdgpu_cs_parser *sched_job);
- struct amd_sched_fence *s_fence;
+struct amdgpu_job {
+ struct amd_sched_job base;
+ struct amdgpu_device *adev;
+ struct amdgpu_ib *ibs;
+ uint32_t num_ibs;
+ struct mutex job_lock;
+ struct amdgpu_user_fence uf;
+ int (*free_job)(struct amdgpu_job *job);
};
+#define to_amdgpu_job(sched_job) \
+ container_of((sched_job), struct amdgpu_job, base)
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx)
{
@@ -1659,7 +1672,6 @@ struct amdgpu_uvd {
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
uint64_t gpu_addr;
- void *saved_bo;
atomic_t handles[AMDGPU_MAX_UVD_HANDLES];
struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES];
struct delayed_work idle_work;
@@ -1703,6 +1715,7 @@ struct amdgpu_sdma {
uint32_t feature_version;
struct amdgpu_ring ring;
+ bool burst_nop;
};
/*
@@ -2051,7 +2064,7 @@ struct amdgpu_device {
struct amdgpu_gfx gfx;
/* sdma */
- struct amdgpu_sdma sdma[2];
+ struct amdgpu_sdma sdma[AMDGPU_MAX_SDMA_INSTANCES];
struct amdgpu_irq_src sdma_trap_irq;
struct amdgpu_irq_src sdma_illegal_inst_irq;
@@ -2190,6 +2203,21 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
ring->ring_free_dw--;
}
+static inline struct amdgpu_sdma * amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int i;
+
+ for (i = 0; i < AMDGPU_MAX_SDMA_INSTANCES; i++)
+ if (&adev->sdma[i].ring == ring)
+ break;
+
+ if (i < AMDGPU_MAX_SDMA_INSTANCES)
+ return &adev->sdma[i];
+ else
+ return NULL;
+}
+
/*
* ASICs macro.
*/
@@ -2241,8 +2269,8 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
#define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s))
#define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s))
-#define amdgpu_emit_copy_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((r), (s), (d), (b))
-#define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b))
+#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b))
+#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
#define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev))
#define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev))
#define amdgpu_dpm_set_power_state(adev) (adev)->pm.funcs->set_power_state((adev))
@@ -2343,7 +2371,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync);
void amdgpu_vm_flush(struct amdgpu_ring *ring,
struct amdgpu_vm *vm,
- struct amdgpu_fence *updates);
+ struct fence *updates);
void amdgpu_vm_fence(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_fence *fence);
@@ -2373,7 +2401,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
uint64_t addr);
void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va);
-
+int amdgpu_vm_free_job(struct amdgpu_job *job);
/*
* functions used by amdgpu_encoder.c
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 496ed2192eba..84d68d658f8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -183,7 +183,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
return -ENOMEM;
r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
- AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, &(*mem)->bo);
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo);
if (r) {
dev_err(rdev->dev,
"failed to allocate BO for amdkfd (%d)\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 759482e4300d..cd639c362df3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
{
unsigned long start_jiffies;
unsigned long end_jiffies;
- struct amdgpu_fence *fence = NULL;
+ struct fence *fence = NULL;
int i, r;
start_jiffies = jiffies;
@@ -42,17 +42,17 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence);
if (r)
goto exit_do_move;
- r = fence_wait(&fence->base, false);
+ r = fence_wait(fence, false);
if (r)
goto exit_do_move;
- amdgpu_fence_unref(&fence);
+ fence_put(fence);
}
end_jiffies = jiffies;
r = jiffies_to_msecs(end_jiffies - start_jiffies);
exit_do_move:
if (fence)
- amdgpu_fence_unref(&fence);
+ fence_put(fence);
return r;
}
@@ -79,7 +79,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
int time;
n = AMDGPU_BENCHMARK_ITERATIONS;
- r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL, &sobj);
+ r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL,
+ NULL, &sobj);
if (r) {
goto out_cleanup;
}
@@ -91,7 +92,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
if (r) {
goto out_cleanup;
}
- r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL, &dobj);
+ r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL,
+ NULL, &dobj);
if (r) {
goto out_cleanup;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 7eed523bf28f..f82a2dd83874 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -62,39 +62,6 @@ static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
return 0;
}
-struct amdgpu_bo_list *
-amdgpu_bo_list_clone(struct amdgpu_bo_list *list)
-{
- struct amdgpu_bo_list *result;
- unsigned i;
-
- result = kmalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
- if (!result)
- return NULL;
-
- result->array = drm_calloc_large(list->num_entries,
- sizeof(struct amdgpu_bo_list_entry));
- if (!result->array) {
- kfree(result);
- return NULL;
- }
-
- mutex_init(&result->lock);
- result->gds_obj = list->gds_obj;
- result->gws_obj = list->gws_obj;
- result->oa_obj = list->oa_obj;
- result->has_userptr = list->has_userptr;
- result->num_entries = list->num_entries;
-
- memcpy(result->array, list->array, list->num_entries *
- sizeof(struct amdgpu_bo_list_entry));
-
- for (i = 0; i < result->num_entries; ++i)
- amdgpu_bo_ref(result->array[i].robj);
-
- return result;
-}
-
static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
{
struct amdgpu_bo_list *list;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 6b1243f9f86d..1c3fc99c5465 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -86,7 +86,7 @@ static int amdgpu_cgs_gmap_kmem(void *cgs_device, void *kmem,
struct sg_table *sg = drm_prime_pages_to_sg(&kmem_page, npages);
ret = amdgpu_bo_create(adev, size, PAGE_SIZE, false,
- AMDGPU_GEM_DOMAIN_GTT, 0, sg, &bo);
+ AMDGPU_GEM_DOMAIN_GTT, 0, sg, NULL, &bo);
if (ret)
return ret;
ret = amdgpu_bo_reserve(bo, false);
@@ -197,7 +197,8 @@ static int amdgpu_cgs_alloc_gpu_mem(void *cgs_device,
ret = amdgpu_bo_create_restricted(adev, size, PAGE_SIZE,
true, domain, flags,
- NULL, &placement, &obj);
+ NULL, &placement, NULL,
+ &obj);
if (ret) {
DRM_ERROR("(%d) bo create failed\n", ret);
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 27df17a0e620..89c3dd62ba21 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -75,6 +75,11 @@ void amdgpu_connector_hotplug(struct drm_connector *connector)
if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
} else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
+ /* Don't try to start link training before we
+ * have the dpcd */
+ if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+ return;
+
/* set it to OFF so that drm_helper_connector_dpms()
* won't return immediately since the current state
* is ON at this point.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e4424b4db5d3..749420f1ea6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -126,19 +126,6 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
return 0;
}
-static void amdgpu_job_work_func(struct work_struct *work)
-{
- struct amdgpu_cs_parser *sched_job =
- container_of(work, struct amdgpu_cs_parser,
- job_work);
- mutex_lock(&sched_job->job_lock);
- if (sched_job->free_job)
- sched_job->free_job(sched_job);
- mutex_unlock(&sched_job->job_lock);
- /* after processing job, free memory */
- fence_put(&sched_job->s_fence->base);
- kfree(sched_job);
-}
struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
struct drm_file *filp,
struct amdgpu_ctx *ctx,
@@ -157,10 +144,6 @@ struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
parser->ctx = ctx;
parser->ibs = ibs;
parser->num_ibs = num_ibs;
- if (amdgpu_enable_scheduler) {
- mutex_init(&parser->job_lock);
- INIT_WORK(&parser->job_work, amdgpu_job_work_func);
- }
for (i = 0; i < num_ibs; i++)
ibs[i].ctx = ctx;
@@ -171,56 +154,41 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
{
union drm_amdgpu_cs *cs = data;
uint64_t *chunk_array_user;
- uint64_t *chunk_array = NULL;
+ uint64_t *chunk_array;
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct amdgpu_bo_list *bo_list = NULL;
unsigned size, i;
- int r = 0;
+ int ret;
- if (!cs->in.num_chunks)
- goto out;
+ if (cs->in.num_chunks == 0)
+ return 0;
+
+ chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
+ if (!chunk_array)
+ return -ENOMEM;
p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
if (!p->ctx) {
- r = -EINVAL;
- goto out;
- }
- bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
- if (!amdgpu_enable_scheduler)
- p->bo_list = bo_list;
- else {
- if (bo_list && !bo_list->has_userptr) {
- p->bo_list = amdgpu_bo_list_clone(bo_list);
- amdgpu_bo_list_put(bo_list);
- if (!p->bo_list)
- return -ENOMEM;
- } else if (bo_list && bo_list->has_userptr)
- p->bo_list = bo_list;
- else
- p->bo_list = NULL;
+ ret = -EINVAL;
+ goto free_chunk;
}
+ p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
+
/* get chunks */
INIT_LIST_HEAD(&p->validated);
- chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
- if (chunk_array == NULL) {
- r = -ENOMEM;
- goto out;
- }
-
chunk_array_user = (uint64_t __user *)(cs->in.chunks);
if (copy_from_user(chunk_array, chunk_array_user,
sizeof(uint64_t)*cs->in.num_chunks)) {
- r = -EFAULT;
- goto out;
+ ret = -EFAULT;
+ goto put_bo_list;
}
p->nchunks = cs->in.num_chunks;
p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
GFP_KERNEL);
- if (p->chunks == NULL) {
- r = -ENOMEM;
- goto out;
+ if (!p->chunks) {
+ ret = -ENOMEM;
+ goto put_bo_list;
}
for (i = 0; i < p->nchunks; i++) {
@@ -231,8 +199,9 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
chunk_ptr = (void __user *)chunk_array[i];
if (copy_from_user(&user_chunk, chunk_ptr,
sizeof(struct drm_amdgpu_cs_chunk))) {
- r = -EFAULT;
- goto out;
+ ret = -EFAULT;
+ i--;
+ goto free_partial_kdata;
}
p->chunks[i].chunk_id = user_chunk.chunk_id;
p->chunks[i].length_dw = user_chunk.length_dw;
@@ -243,13 +212,14 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
if (p->chunks[i].kdata == NULL) {
- r = -ENOMEM;
- goto out;
+ ret = -ENOMEM;
+ i--;
+ goto free_partial_kdata;
}
size *= sizeof(uint32_t);
if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
- r = -EFAULT;
- goto out;
+ ret = -EFAULT;
+ goto free_partial_kdata;
}
switch (p->chunks[i].chunk_id) {
@@ -269,15 +239,15 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
gobj = drm_gem_object_lookup(p->adev->ddev,
p->filp, handle);
if (gobj == NULL) {
- r = -EINVAL;
- goto out;
+ ret = -EINVAL;
+ goto free_partial_kdata;
}
p->uf.bo = gem_to_amdgpu_bo(gobj);
p->uf.offset = fence_data->offset;
} else {
- r = -EINVAL;
- goto out;
+ ret = -EINVAL;
+ goto free_partial_kdata;
}
break;
@@ -285,19 +255,35 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
break;
default:
- r = -EINVAL;
- goto out;
+ ret = -EINVAL;
+ goto free_partial_kdata;
}
}
- p->ibs = kmalloc_array(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL);
- if (!p->ibs)
- r = -ENOMEM;
+ p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL);
+ if (!p->ibs) {
+ ret = -ENOMEM;
+ goto free_all_kdata;
+ }
-out:
kfree(chunk_array);
- return r;
+ return 0;
+
+free_all_kdata:
+ i = p->nchunks - 1;
+free_partial_kdata:
+ for (; i >= 0; i--)
+ drm_free_large(p->chunks[i].kdata);
+ kfree(p->chunks);
+put_bo_list:
+ if (p->bo_list)
+ amdgpu_bo_list_put(p->bo_list);
+ amdgpu_ctx_put(p->ctx);
+free_chunk:
+ kfree(chunk_array);
+
+ return ret;
}
/* Returns how many bytes TTM can move per IB.
@@ -352,25 +338,17 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
return max(bytes_moved_threshold, 1024*1024ull);
}
-int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p)
+int amdgpu_cs_list_validate(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct list_head *validated)
{
- struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct amdgpu_vm *vm = &fpriv->vm;
- struct amdgpu_device *adev = p->adev;
struct amdgpu_bo_list_entry *lobj;
- struct list_head duplicates;
struct amdgpu_bo *bo;
u64 bytes_moved = 0, initial_bytes_moved;
u64 bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(adev);
int r;
- INIT_LIST_HEAD(&duplicates);
- r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates);
- if (unlikely(r != 0)) {
- return r;
- }
-
- list_for_each_entry(lobj, &p->validated, tv.head) {
+ list_for_each_entry(lobj, validated, tv.head) {
bo = lobj->robj;
if (!bo->pin_count) {
u32 domain = lobj->prefered_domains;
@@ -385,7 +363,7 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p)
* into account. We don't want to disallow buffer moves
* completely.
*/
- if (current_domain != AMDGPU_GEM_DOMAIN_CPU &&
+ if ((lobj->allowed_domains & current_domain) != 0 &&
(domain & current_domain) == 0 && /* will be moved */
bytes_moved > bytes_moved_threshold) {
/* don't move it */
@@ -404,7 +382,6 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p)
domain = lobj->allowed_domains;
goto retry;
}
- ttm_eu_backoff_reservation(&p->ticket, &p->validated);
return r;
}
}
@@ -417,6 +394,7 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_cs_buckets buckets;
+ struct list_head duplicates;
bool need_mmap_lock = false;
int i, r;
@@ -436,8 +414,22 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p)
if (need_mmap_lock)
down_read(&current->mm->mmap_sem);
- r = amdgpu_cs_list_validate(p);
+ INIT_LIST_HEAD(&duplicates);
+ r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates);
+ if (unlikely(r != 0))
+ goto error_reserve;
+
+ r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &p->validated);
+ if (r)
+ goto error_validate;
+ r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &duplicates);
+
+error_validate:
+ if (r)
+ ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+
+error_reserve:
if (need_mmap_lock)
up_read(&current->mm->mmap_sem);
@@ -498,25 +490,24 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
unsigned i;
if (parser->ctx)
amdgpu_ctx_put(parser->ctx);
- if (parser->bo_list) {
- if (amdgpu_enable_scheduler && !parser->bo_list->has_userptr)
- amdgpu_bo_list_free(parser->bo_list);
- else
- amdgpu_bo_list_put(parser->bo_list);
- }
+ if (parser->bo_list)
+ amdgpu_bo_list_put(parser->bo_list);
+
drm_free_large(parser->vm_bos);
for (i = 0; i < parser->nchunks; i++)
drm_free_large(parser->chunks[i].kdata);
kfree(parser->chunks);
- if (parser->ibs)
- for (i = 0; i < parser->num_ibs; i++)
- amdgpu_ib_free(parser->adev, &parser->ibs[i]);
- kfree(parser->ibs);
- if (parser->uf.bo)
- drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
-
if (!amdgpu_enable_scheduler)
- kfree(parser);
+ {
+ if (parser->ibs)
+ for (i = 0; i < parser->num_ibs; i++)
+ amdgpu_ib_free(parser->adev, &parser->ibs[i]);
+ kfree(parser->ibs);
+ if (parser->uf.bo)
+ drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
+ }
+
+ kfree(parser);
}
/**
@@ -533,12 +524,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
amdgpu_cs_parser_fini_late(parser);
}
-static int amdgpu_cs_parser_free_job(struct amdgpu_cs_parser *sched_job)
-{
- amdgpu_cs_parser_fini_late(sched_job);
- return 0;
-}
-
static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
struct amdgpu_vm *vm)
{
@@ -810,68 +795,16 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
return 0;
}
-static int amdgpu_cs_parser_prepare_job(struct amdgpu_cs_parser *sched_job)
+static int amdgpu_cs_free_job(struct amdgpu_job *job)
{
- int r, i;
- struct amdgpu_cs_parser *parser = sched_job;
- struct amdgpu_device *adev = sched_job->adev;
- bool reserved_buffers = false;
-
- r = amdgpu_cs_parser_relocs(parser);
- if (r) {
- if (r != -ERESTARTSYS) {
- if (r == -ENOMEM)
- DRM_ERROR("Not enough memory for command submission!\n");
- else
- DRM_ERROR("Failed to process the buffer list %d!\n", r);
- }
- }
-
- if (!r) {
- reserved_buffers = true;
- r = amdgpu_cs_ib_fill(adev, parser);
- }
- if (!r) {
- r = amdgpu_cs_dependencies(adev, parser);
- if (r)
- DRM_ERROR("Failed in the dependencies handling %d!\n", r);
- }
- if (r) {
- amdgpu_cs_parser_fini(parser, r, reserved_buffers);
- return r;
- }
-
- for (i = 0; i < parser->num_ibs; i++)
- trace_amdgpu_cs(parser, i);
-
- r = amdgpu_cs_ib_vm_chunk(adev, parser);
- return r;
-}
-
-static struct amdgpu_ring *amdgpu_cs_parser_get_ring(
- struct amdgpu_device *adev,
- struct amdgpu_cs_parser *parser)
-{
- int i, r;
-
- struct amdgpu_cs_chunk *chunk;
- struct drm_amdgpu_cs_chunk_ib *chunk_ib;
- struct amdgpu_ring *ring;
- for (i = 0; i < parser->nchunks; i++) {
- chunk = &parser->chunks[i];
- chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
-
- if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
- continue;
-
- r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type,
- chunk_ib->ip_instance, chunk_ib->ring,
- &ring);
- if (r)
- return NULL;
- break;
- }
- return ring;
+ int i;
+ if (job->ibs)
+ for (i = 0; i < job->num_ibs; i++)
+ amdgpu_ib_free(job->adev, &job->ibs[i]);
+ kfree(job->ibs);
+ if (job->uf.bo)
+ drm_gem_object_unreference_unlocked(&job->uf.bo->gem_base);
+ return 0;
}
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
@@ -879,7 +812,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
struct amdgpu_device *adev = dev->dev_private;
union drm_amdgpu_cs *cs = data;
struct amdgpu_cs_parser *parser;
- int r;
+ bool reserved_buffers = false;
+ int i, r;
down_read(&adev->exclusive_lock);
if (!adev->accel_working) {
@@ -893,50 +827,85 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
r = amdgpu_cs_parser_init(parser, data);
if (r) {
DRM_ERROR("Failed to initialize parser !\n");
- amdgpu_cs_parser_fini(parser, r, false);
+ kfree(parser);
up_read(&adev->exclusive_lock);
r = amdgpu_cs_handle_lockup(adev, r);
return r;
}
- if (amdgpu_enable_scheduler && parser->num_ibs) {
- struct amdgpu_ring * ring =
- amdgpu_cs_parser_get_ring(adev, parser);
- r = amdgpu_cs_parser_prepare_job(parser);
+ r = amdgpu_cs_parser_relocs(parser);
+ if (r == -ENOMEM)
+ DRM_ERROR("Not enough memory for command submission!\n");
+ else if (r && r != -ERESTARTSYS)
+ DRM_ERROR("Failed to process the buffer list %d!\n", r);
+ else if (!r) {
+ reserved_buffers = true;
+ r = amdgpu_cs_ib_fill(adev, parser);
+ }
+
+ if (!r) {
+ r = amdgpu_cs_dependencies(adev, parser);
if (r)
- goto out;
- parser->ring = ring;
- parser->free_job = amdgpu_cs_parser_free_job;
- mutex_lock(&parser->job_lock);
- r = amd_sched_push_job(ring->scheduler,
- &parser->ctx->rings[ring->idx].entity,
- parser,
- &parser->s_fence);
+ DRM_ERROR("Failed in the dependencies handling %d!\n", r);
+ }
+
+ if (r)
+ goto out;
+
+ for (i = 0; i < parser->num_ibs; i++)
+ trace_amdgpu_cs(parser, i);
+
+ r = amdgpu_cs_ib_vm_chunk(adev, parser);
+ if (r)
+ goto out;
+
+ if (amdgpu_enable_scheduler && parser->num_ibs) {
+ struct amdgpu_job *job;
+ struct amdgpu_ring * ring = parser->ibs->ring;
+ job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
+ if (!job)
+ return -ENOMEM;
+ job->base.sched = &ring->sched;
+ job->base.s_entity = &parser->ctx->rings[ring->idx].entity;
+ job->adev = parser->adev;
+ job->ibs = parser->ibs;
+ job->num_ibs = parser->num_ibs;
+ job->base.owner = parser->filp;
+ mutex_init(&job->job_lock);
+ if (job->ibs[job->num_ibs - 1].user) {
+ memcpy(&job->uf, &parser->uf,
+ sizeof(struct amdgpu_user_fence));
+ job->ibs[job->num_ibs - 1].user = &job->uf;
+ }
+
+ job->free_job = amdgpu_cs_free_job;
+ mutex_lock(&job->job_lock);
+ r = amd_sched_entity_push_job(&job->base);
if (r) {
- mutex_unlock(&parser->job_lock);
+ mutex_unlock(&job->job_lock);
+ amdgpu_cs_free_job(job);
+ kfree(job);
goto out;
}
- parser->ibs[parser->num_ibs - 1].sequence =
+ cs->out.handle =
amdgpu_ctx_add_fence(parser->ctx, ring,
- &parser->s_fence->base,
- parser->s_fence->v_seq);
- cs->out.handle = parser->s_fence->v_seq;
+ &job->base.s_fence->base);
+ parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle;
+
list_sort(NULL, &parser->validated, cmp_size_smaller_first);
ttm_eu_fence_buffer_objects(&parser->ticket,
&parser->validated,
- &parser->s_fence->base);
+ &job->base.s_fence->base);
- mutex_unlock(&parser->job_lock);
+ mutex_unlock(&job->job_lock);
+ amdgpu_cs_parser_fini_late(parser);
up_read(&adev->exclusive_lock);
return 0;
}
- r = amdgpu_cs_parser_prepare_job(parser);
- if (r)
- goto out;
cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence;
out:
- amdgpu_cs_parser_fini(parser, r, true);
+ amdgpu_cs_parser_fini(parser, r, reserved_buffers);
up_read(&adev->exclusive_lock);
r = amdgpu_cs_handle_lockup(adev, r);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 08bc7722ddb8..e0b80ccdfe8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -43,10 +43,10 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel,
for (i = 0; i < adev->num_rings; i++) {
struct amd_sched_rq *rq;
if (kernel)
- rq = &adev->rings[i]->scheduler->kernel_rq;
+ rq = &adev->rings[i]->sched.kernel_rq;
else
- rq = &adev->rings[i]->scheduler->sched_rq;
- r = amd_sched_entity_init(adev->rings[i]->scheduler,
+ rq = &adev->rings[i]->sched.sched_rq;
+ r = amd_sched_entity_init(&adev->rings[i]->sched,
&ctx->rings[i].entity,
rq, amdgpu_sched_jobs);
if (r)
@@ -55,7 +55,7 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel,
if (i < adev->num_rings) {
for (j = 0; j < i; j++)
- amd_sched_entity_fini(adev->rings[j]->scheduler,
+ amd_sched_entity_fini(&adev->rings[j]->sched,
&ctx->rings[j].entity);
kfree(ctx);
return r;
@@ -75,7 +75,7 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
if (amdgpu_enable_scheduler) {
for (i = 0; i < adev->num_rings; i++)
- amd_sched_entity_fini(adev->rings[i]->scheduler,
+ amd_sched_entity_fini(&adev->rings[i]->sched,
&ctx->rings[i].entity);
}
}
@@ -229,17 +229,13 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
}
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
- struct fence *fence, uint64_t queued_seq)
+ struct fence *fence)
{
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
- uint64_t seq = 0;
+ uint64_t seq = cring->sequence;
unsigned idx = 0;
struct fence *other = NULL;
- if (amdgpu_enable_scheduler)
- seq = queued_seq;
- else
- seq = cring->sequence;
idx = seq % AMDGPU_CTX_MAX_CS_PENDING;
other = cring->fences[idx];
if (other) {
@@ -253,8 +249,7 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
spin_lock(&ctx->ring_lock);
cring->fences[idx] = fence;
- if (!amdgpu_enable_scheduler)
- cring->sequence++;
+ cring->sequence++;
spin_unlock(&ctx->ring_lock);
fence_put(other);
@@ -267,21 +262,16 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
{
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
struct fence *fence;
- uint64_t queued_seq;
spin_lock(&ctx->ring_lock);
- if (amdgpu_enable_scheduler)
- queued_seq = amd_sched_next_queued_seq(&cring->entity);
- else
- queued_seq = cring->sequence;
- if (seq >= queued_seq) {
+ if (seq >= cring->sequence) {
spin_unlock(&ctx->ring_lock);
return ERR_PTR(-EINVAL);
}
- if (seq + AMDGPU_CTX_MAX_CS_PENDING < queued_seq) {
+ if (seq + AMDGPU_CTX_MAX_CS_PENDING < cring->sequence) {
spin_unlock(&ctx->ring_lock);
return NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 42d1a22c1199..6068d8207d10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -244,8 +244,9 @@ static int amdgpu_vram_scratch_init(struct amdgpu_device *adev)
if (adev->vram_scratch.robj == NULL) {
r = amdgpu_bo_create(adev, AMDGPU_GPU_PAGE_SIZE,
- PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, 0,
- NULL, &adev->vram_scratch.robj);
+ PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ NULL, NULL, &adev->vram_scratch.robj);
if (r) {
return r;
}
@@ -448,7 +449,8 @@ static int amdgpu_wb_init(struct amdgpu_device *adev)
if (adev->wb.wb_obj == NULL) {
r = amdgpu_bo_create(adev, AMDGPU_MAX_WB * 4, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_GTT, 0, NULL, &adev->wb.wb_obj);
+ AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
+ &adev->wb.wb_obj);
if (r) {
dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
return r;
@@ -1649,9 +1651,11 @@ int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
drm_kms_helper_poll_disable(dev);
/* turn off display hw */
+ drm_modeset_lock_all(dev);
list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
}
+ drm_modeset_unlock_all(dev);
/* unpin the front buffers */
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -1746,9 +1750,11 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
if (fbcon) {
drm_helper_resume_force_mode(dev);
/* turn on display hw */
+ drm_modeset_lock_all(dev);
list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
}
+ drm_modeset_unlock_all(dev);
}
drm_kms_helper_poll_enable(dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index e3d70772b531..9b34a3410c32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -745,7 +745,8 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
*
*/
int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, int crtc, unsigned int flags,
- int *vpos, int *hpos, ktime_t *stime, ktime_t *etime)
+ int *vpos, int *hpos, ktime_t *stime, ktime_t *etime,
+ const struct drm_display_mode *mode)
{
u32 vbl = 0, position = 0;
int vbl_start, vbl_end, vtotal, ret = 0;
@@ -781,7 +782,7 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, int crtc, unsigned int fl
}
else {
/* No: Fake something reasonable which gives at least ok results. */
- vbl_start = adev->mode_info.crtcs[crtc]->base.hwmode.crtc_vdisplay;
+ vbl_start = mode->crtc_vdisplay;
vbl_end = 0;
}
@@ -797,7 +798,7 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, int crtc, unsigned int fl
/* Inside "upper part" of vblank area? Apply corrective offset if so: */
if (in_vbl && (*vpos >= vbl_start)) {
- vtotal = adev->mode_info.crtcs[crtc]->base.hwmode.crtc_vtotal;
+ vtotal = mode->crtc_vtotal;
*vpos = *vpos - vtotal;
}
@@ -819,8 +820,8 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, int crtc, unsigned int fl
* We only do this if DRM_CALLED_FROM_VBLIRQ.
*/
if ((flags & DRM_CALLED_FROM_VBLIRQ) && !in_vbl) {
- vbl_start = adev->mode_info.crtcs[crtc]->base.hwmode.crtc_vdisplay;
- vtotal = adev->mode_info.crtcs[crtc]->base.hwmode.crtc_vtotal;
+ vbl_start = mode->crtc_vdisplay;
+ vtotal = mode->crtc_vtotal;
if (vbl_start - *vpos < vtotal / 100) {
*vpos -= vtotal;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index e6fa27805207..adb48353f2e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -49,9 +49,10 @@
/*
* KMS wrapper.
* - 3.0.0 - initial driver
+ * - 3.1.0 - allow reading more status registers (GRBM, SRBM, SDMA, CP)
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 0
+#define KMS_DRIVER_MINOR 1
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
@@ -78,6 +79,7 @@ int amdgpu_exp_hw_support = 0;
int amdgpu_enable_scheduler = 0;
int amdgpu_sched_jobs = 16;
int amdgpu_sched_hw_submission = 2;
+int amdgpu_enable_semaphores = 1;
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -151,6 +153,9 @@ module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444);
MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");
module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
+MODULE_PARM_DESC(enable_semaphores, "Enable semaphores (1 = enable (default), 0 = disable)");
+module_param_named(enable_semaphores, amdgpu_enable_semaphores, int, 0644);
+
static struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_CIK
/* Kaveri */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 81b821247dde..8a122b1b7786 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -126,8 +126,8 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
aligned_size = ALIGN(size, PAGE_SIZE);
ret = amdgpu_gem_object_create(adev, aligned_size, 0,
AMDGPU_GEM_DOMAIN_VRAM,
- 0, true,
- &gobj);
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ true, &gobj);
if (ret) {
printk(KERN_ERR "failed to allocate framebuffer (%d)\n",
aligned_size);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 98500f1756f7..b3fc26c59787 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -609,9 +609,9 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
* Init the fence driver for the requested ring (all asics).
* Helper function for amdgpu_fence_driver_init().
*/
-void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
{
- int i;
+ int i, r;
ring->fence_drv.cpu_addr = NULL;
ring->fence_drv.gpu_addr = 0;
@@ -625,15 +625,19 @@ void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
amdgpu_fence_check_lockup);
ring->fence_drv.ring = ring;
+ init_waitqueue_head(&ring->fence_drv.fence_queue);
+
if (amdgpu_enable_scheduler) {
- ring->scheduler = amd_sched_create((void *)ring->adev,
- &amdgpu_sched_ops,
- ring->idx, 5, 0,
- amdgpu_sched_hw_submission);
- if (!ring->scheduler)
- DRM_ERROR("Failed to create scheduler on ring %d.\n",
- ring->idx);
+ r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
+ amdgpu_sched_hw_submission, ring->name);
+ if (r) {
+ DRM_ERROR("Failed to create scheduler on ring %s.\n",
+ ring->name);
+ return r;
+ }
}
+
+ return 0;
}
/**
@@ -681,8 +685,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
wake_up_all(&ring->fence_drv.fence_queue);
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
- if (ring->scheduler)
- amd_sched_destroy(ring->scheduler);
+ amd_sched_fini(&ring->sched);
ring->fence_drv.initialized = false;
}
mutex_unlock(&adev->ring_lock);
@@ -836,16 +839,15 @@ static inline bool amdgpu_test_signaled(struct amdgpu_fence *fence)
return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
}
-static inline bool amdgpu_test_signaled_any(struct amdgpu_fence **fences)
+static bool amdgpu_test_signaled_any(struct fence **fences, uint32_t count)
{
int idx;
- struct amdgpu_fence *fence;
+ struct fence *fence;
- idx = 0;
- for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) {
+ for (idx = 0; idx < count; ++idx) {
fence = fences[idx];
if (fence) {
- if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags))
+ if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
return true;
}
}
@@ -867,33 +869,48 @@ static void amdgpu_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
static signed long amdgpu_fence_default_wait(struct fence *f, bool intr,
signed long t)
{
- struct amdgpu_fence *array[AMDGPU_MAX_RINGS];
struct amdgpu_fence *fence = to_amdgpu_fence(f);
struct amdgpu_device *adev = fence->ring->adev;
- memset(&array[0], 0, sizeof(array));
- array[0] = fence;
-
- return amdgpu_fence_wait_any(adev, array, intr, t);
+ return amdgpu_fence_wait_any(adev, &f, 1, intr, t);
}
-/* wait until any fence in array signaled */
+/**
+ * Wait the fence array with timeout
+ *
+ * @adev: amdgpu device
+ * @array: the fence array with amdgpu fence pointer
+ * @count: the number of the fence array
+ * @intr: when sleep, set the current task interruptable or not
+ * @t: timeout to wait
+ *
+ * It will return when any fence is signaled or timeout.
+ */
signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
- struct amdgpu_fence **array, bool intr, signed long t)
+ struct fence **array, uint32_t count,
+ bool intr, signed long t)
{
- long idx = 0;
- struct amdgpu_wait_cb cb[AMDGPU_MAX_RINGS];
- struct amdgpu_fence *fence;
+ struct amdgpu_wait_cb *cb;
+ struct fence *fence;
+ unsigned idx;
BUG_ON(!array);
- for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) {
+ cb = kcalloc(count, sizeof(struct amdgpu_wait_cb), GFP_KERNEL);
+ if (cb == NULL) {
+ t = -ENOMEM;
+ goto err_free_cb;
+ }
+
+ for (idx = 0; idx < count; ++idx) {
fence = array[idx];
if (fence) {
cb[idx].task = current;
- if (fence_add_callback(&fence->base,
- &cb[idx].base, amdgpu_fence_wait_cb))
- return t; /* return if fence is already signaled */
+ if (fence_add_callback(fence,
+ &cb[idx].base, amdgpu_fence_wait_cb)) {
+ /* The fence is already signaled */
+ goto fence_rm_cb;
+ }
}
}
@@ -907,7 +924,7 @@ signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
* amdgpu_test_signaled_any must be called after
* set_current_state to prevent a race with wake_up_process
*/
- if (amdgpu_test_signaled_any(array))
+ if (amdgpu_test_signaled_any(array, count))
break;
if (adev->needs_reset) {
@@ -923,13 +940,16 @@ signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
__set_current_state(TASK_RUNNING);
- idx = 0;
- for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) {
+fence_rm_cb:
+ for (idx = 0; idx < count; ++idx) {
fence = array[idx];
- if (fence)
- fence_remove_callback(&fence->base, &cb[idx].base);
+ if (fence && cb[idx].base.func)
+ fence_remove_callback(fence, &cb[idx].base);
}
+err_free_cb:
+ kfree(cb);
+
return t;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index e02db0b2e839..7312d729d300 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -125,8 +125,9 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
if (adev->gart.robj == NULL) {
r = amdgpu_bo_create(adev, adev->gart.table_size,
- PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, 0,
- NULL, &adev->gart.robj);
+ PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ NULL, NULL, &adev->gart.robj);
if (r) {
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 4afc507820c0..7297ca3a0ba7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -69,7 +69,8 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
}
}
retry:
- r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, flags, NULL, &robj);
+ r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
+ flags, NULL, NULL, &robj);
if (r) {
if (r != -ERESTARTSYS) {
if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
@@ -426,6 +427,10 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
&args->data.data_size_bytes,
&args->data.flags);
} else if (args->op == AMDGPU_GEM_METADATA_OP_SET_METADATA) {
+ if (args->data.data_size_bytes > sizeof(args->data.data)) {
+ r = -EINVAL;
+ goto unreserve;
+ }
r = amdgpu_bo_set_tiling_flags(robj, args->data.tiling_info);
if (!r)
r = amdgpu_bo_set_metadata(robj, args->data.data,
@@ -433,6 +438,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
args->data.flags);
}
+unreserve:
amdgpu_bo_unreserve(robj);
out:
drm_gem_object_unreference_unlocked(gobj);
@@ -454,11 +460,12 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
struct ttm_validate_buffer tv, *entry;
struct amdgpu_bo_list_entry *vm_bos;
struct ww_acquire_ctx ticket;
- struct list_head list;
+ struct list_head list, duplicates;
unsigned domain;
int r;
INIT_LIST_HEAD(&list);
+ INIT_LIST_HEAD(&duplicates);
tv.bo = &bo_va->bo->tbo;
tv.shared = true;
@@ -468,7 +475,8 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
if (!vm_bos)
return;
- r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+ /* Provide duplicates to avoid -EALREADY */
+ r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
if (r)
goto error_free;
@@ -615,6 +623,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT;
info.domains = robj->initial_domain;
info.domain_flags = robj->flags;
+ amdgpu_bo_unreserve(robj);
if (copy_to_user(out, &info, sizeof(info)))
r = -EFAULT;
break;
@@ -622,17 +631,19 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
case AMDGPU_GEM_OP_SET_PLACEMENT:
if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm)) {
r = -EPERM;
+ amdgpu_bo_unreserve(robj);
break;
}
robj->initial_domain = args->value & (AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT |
AMDGPU_GEM_DOMAIN_CPU);
+ amdgpu_bo_unreserve(robj);
break;
default:
+ amdgpu_bo_unreserve(robj);
r = -EINVAL;
}
- amdgpu_bo_unreserve(robj);
out:
drm_gem_object_unreference_unlocked(gobj);
return r;
@@ -648,12 +659,13 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
int r;
args->pitch = amdgpu_align_pitch(adev, args->width, args->bpp, 0) * ((args->bpp + 1) / 8);
- args->size = args->pitch * args->height;
+ args->size = (u64)args->pitch * args->height;
args->size = ALIGN(args->size, PAGE_SIZE);
r = amdgpu_gem_object_create(adev, args->size, 0,
AMDGPU_GEM_DOMAIN_VRAM,
- 0, ttm_bo_type_device,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ ttm_bo_type_device,
&gobj);
if (r)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 5104e64e9ad8..c439735ee670 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -73,29 +73,12 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
if (!vm)
ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
- else
- ib->gpu_addr = 0;
-
- } else {
- ib->sa_bo = NULL;
- ib->ptr = NULL;
- ib->gpu_addr = 0;
}
amdgpu_sync_create(&ib->sync);
ib->ring = ring;
- ib->fence = NULL;
- ib->user = NULL;
ib->vm = vm;
- ib->ctx = NULL;
- ib->gds_base = 0;
- ib->gds_size = 0;
- ib->gws_base = 0;
- ib->gws_size = 0;
- ib->oa_base = 0;
- ib->oa_size = 0;
- ib->flags = 0;
return 0;
}
@@ -110,8 +93,8 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
*/
void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
{
- amdgpu_sync_free(adev, &ib->sync, ib->fence);
- amdgpu_sa_bo_free(adev, &ib->sa_bo, ib->fence);
+ amdgpu_sync_free(adev, &ib->sync, &ib->fence->base);
+ amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base);
amdgpu_fence_unref(&ib->fence);
}
@@ -143,7 +126,6 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx, *old_ctx;
struct amdgpu_vm *vm;
- uint64_t sequence;
unsigned i;
int r = 0;
@@ -158,7 +140,11 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
dev_err(adev->dev, "couldn't schedule ib\n");
return -EINVAL;
}
-
+ r = amdgpu_sync_wait(&ibs->sync);
+ if (r) {
+ dev_err(adev->dev, "IB sync failed (%d).\n", r);
+ return r;
+ }
r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs);
if (r) {
dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
@@ -216,12 +202,9 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
return r;
}
- sequence = amdgpu_enable_scheduler ? ib->sequence : 0;
-
if (!amdgpu_enable_scheduler && ib->ctx)
ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
- &ib->fence->base,
- sequence);
+ &ib->fence->base);
/* wrap the last IB with fence */
if (ib->user) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 90044b254404..534fc04e80fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -43,7 +43,7 @@ static int amdgpu_ih_ring_alloc(struct amdgpu_device *adev)
r = amdgpu_bo_create(adev, adev->irq.ih.ring_size,
PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0,
- NULL, &adev->irq.ih.ring_obj);
+ NULL, NULL, &adev->irq.ih.ring_obj);
if (r) {
DRM_ERROR("amdgpu: failed to create ih ring buffer (%d).\n", r);
return r;
@@ -98,18 +98,12 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
/* add 8 bytes for the rptr/wptr shadows and
* add them to the end of the ring allocation.
*/
- adev->irq.ih.ring = kzalloc(adev->irq.ih.ring_size + 8, GFP_KERNEL);
+ adev->irq.ih.ring = pci_alloc_consistent(adev->pdev,
+ adev->irq.ih.ring_size + 8,
+ &adev->irq.ih.rb_dma_addr);
if (adev->irq.ih.ring == NULL)
return -ENOMEM;
- adev->irq.ih.rb_dma_addr = pci_map_single(adev->pdev,
- (void *)adev->irq.ih.ring,
- adev->irq.ih.ring_size,
- PCI_DMA_BIDIRECTIONAL);
- if (pci_dma_mapping_error(adev->pdev, adev->irq.ih.rb_dma_addr)) {
- dev_err(&adev->pdev->dev, "Failed to DMA MAP the IH RB page\n");
- kfree((void *)adev->irq.ih.ring);
- return -ENOMEM;
- }
+ memset((void *)adev->irq.ih.ring, 0, adev->irq.ih.ring_size + 8);
adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0;
adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1;
}
@@ -149,9 +143,9 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev)
/* add 8 bytes for the rptr/wptr shadows and
* add them to the end of the ring allocation.
*/
- pci_unmap_single(adev->pdev, adev->irq.ih.rb_dma_addr,
- adev->irq.ih.ring_size + 8, PCI_DMA_BIDIRECTIONAL);
- kfree((void *)adev->irq.ih.ring);
+ pci_free_consistent(adev->pdev, adev->irq.ih.ring_size + 8,
+ (void *)adev->irq.ih.ring,
+ adev->irq.ih.rb_dma_addr);
adev->irq.ih.ring = NULL;
}
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 0aba8e9bc8a0..7c42ff670080 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -140,7 +140,7 @@ void amdgpu_irq_preinstall(struct drm_device *dev)
*/
int amdgpu_irq_postinstall(struct drm_device *dev)
{
- dev->max_vblank_count = 0x001fffff;
+ dev->max_vblank_count = 0x00ffffff;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 87da6b1848fd..275f1c3dbba0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -390,7 +390,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
}
case AMDGPU_INFO_READ_MMR_REG: {
- unsigned n, alloc_size = info->read_mmr_reg.count * 4;
+ unsigned n, alloc_size;
uint32_t *regs;
unsigned se_num = (info->read_mmr_reg.instance >>
AMDGPU_INFO_MMR_SE_INDEX_SHIFT) &
@@ -406,9 +406,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK)
sh_num = 0xffffffff;
- regs = kmalloc(alloc_size, GFP_KERNEL);
+ regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL);
if (!regs)
return -ENOMEM;
+ alloc_size = info->read_mmr_reg.count * sizeof(*regs);
for (i = 0; i < info->read_mmr_reg.count; i++)
if (amdgpu_asic_read_register(adev, se_num, sh_num,
@@ -560,6 +561,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
if (!fpriv)
return;
+ amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
+
amdgpu_vm_fini(adev, &fpriv->vm);
idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
@@ -568,8 +571,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
idr_destroy(&fpriv->bo_list_handles);
mutex_destroy(&fpriv->bo_list_lock);
- amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
-
kfree(fpriv);
file_priv->driver_priv = NULL;
}
@@ -681,7 +682,7 @@ int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, int crtc,
/* Helper routine in DRM core does all the work: */
return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc, max_error,
vblank_time, flags,
- drmcrtc, &drmcrtc->hwmode);
+ &drmcrtc->hwmode);
}
const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 64efe5b52e65..2b03425f9740 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -543,7 +543,8 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder);
int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, int crtc,
unsigned int flags,
int *vpos, int *hpos, ktime_t *stime,
- ktime_t *etime);
+ ktime_t *etime,
+ const struct drm_display_mode *mode);
int amdgpu_framebuffer_init(struct drm_device *dev,
struct amdgpu_framebuffer *rfb,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 57adcad2f7ba..1a7708f365f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -127,7 +127,7 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
placements[c].fpfn =
adev->mc.visible_vram_size >> PAGE_SHIFT;
placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
- TTM_PL_FLAG_VRAM;
+ TTM_PL_FLAG_VRAM | TTM_PL_FLAG_TOPDOWN;
}
placements[c].fpfn = 0;
placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
@@ -215,6 +215,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
bool kernel, u32 domain, u64 flags,
struct sg_table *sg,
struct ttm_placement *placement,
+ struct reservation_object *resv,
struct amdgpu_bo **bo_ptr)
{
struct amdgpu_bo *bo;
@@ -261,7 +262,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
/* Kernel allocation are uninterruptible */
r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type,
&bo->placement, page_align, !kernel, NULL,
- acc_size, sg, NULL, &amdgpu_ttm_bo_destroy);
+ acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
if (unlikely(r != 0)) {
return r;
}
@@ -275,7 +276,9 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
int amdgpu_bo_create(struct amdgpu_device *adev,
unsigned long size, int byte_align,
bool kernel, u32 domain, u64 flags,
- struct sg_table *sg, struct amdgpu_bo **bo_ptr)
+ struct sg_table *sg,
+ struct reservation_object *resv,
+ struct amdgpu_bo **bo_ptr)
{
struct ttm_placement placement = {0};
struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
@@ -286,11 +289,9 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
amdgpu_ttm_placement_init(adev, &placement,
placements, domain, flags);
- return amdgpu_bo_create_restricted(adev, size, byte_align,
- kernel, domain, flags,
- sg,
- &placement,
- bo_ptr);
+ return amdgpu_bo_create_restricted(adev, size, byte_align, kernel,
+ domain, flags, sg, &placement,
+ resv, bo_ptr);
}
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
@@ -535,12 +536,10 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
if (metadata == NULL)
return -EINVAL;
- buffer = kzalloc(metadata_size, GFP_KERNEL);
+ buffer = kmemdup(metadata, metadata_size, GFP_KERNEL);
if (buffer == NULL)
return -ENOMEM;
- memcpy(buffer, metadata, metadata_size);
-
kfree(bo->metadata);
bo->metadata_flags = flags;
bo->metadata = buffer;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 238465a9ac55..3c2ff4567798 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -129,12 +129,14 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
unsigned long size, int byte_align,
bool kernel, u32 domain, u64 flags,
struct sg_table *sg,
+ struct reservation_object *resv,
struct amdgpu_bo **bo_ptr);
int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
unsigned long size, int byte_align,
bool kernel, u32 domain, u64 flags,
struct sg_table *sg,
struct ttm_placement *placement,
+ struct reservation_object *resv,
struct amdgpu_bo **bo_ptr);
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
@@ -193,7 +195,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
unsigned size, unsigned align);
void amdgpu_sa_bo_free(struct amdgpu_device *adev,
struct amdgpu_sa_bo **sa_bo,
- struct amdgpu_fence *fence);
+ struct fence *fence);
#if defined(CONFIG_DEBUG_FS)
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
struct seq_file *m);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index d9652fe32d6a..59f735a933a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -61,12 +61,15 @@ struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach,
struct sg_table *sg)
{
+ struct reservation_object *resv = attach->dmabuf->resv;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_bo *bo;
int ret;
+ ww_mutex_lock(&resv->lock, NULL);
ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false,
- AMDGPU_GEM_DOMAIN_GTT, 0, sg, &bo);
+ AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, &bo);
+ ww_mutex_unlock(&resv->lock);
if (ret)
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 7d442c51063e..30dce235ddeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -131,6 +131,21 @@ int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw)
return 0;
}
+/** amdgpu_ring_insert_nop - insert NOP packets
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @count: the number of NOP packets to insert
+ *
+ * This is the generic insert_nop function for rings except SDMA
+ */
+void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ amdgpu_ring_write(ring, ring->nop);
+}
+
/**
* amdgpu_ring_commit - tell the GPU to execute the new
* commands on the ring buffer
@@ -143,10 +158,13 @@ int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw)
*/
void amdgpu_ring_commit(struct amdgpu_ring *ring)
{
+ uint32_t count;
+
/* We pad to match fetch size */
- while (ring->wptr & ring->align_mask) {
- amdgpu_ring_write(ring, ring->nop);
- }
+ count = ring->align_mask + 1 - (ring->wptr & ring->align_mask);
+ count %= ring->align_mask + 1;
+ ring->funcs->insert_nop(ring, count);
+
mb();
amdgpu_ring_set_wptr(ring);
}
@@ -339,11 +357,11 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->adev = adev;
ring->idx = adev->num_rings++;
adev->rings[ring->idx] = ring;
- amdgpu_fence_driver_init_ring(ring);
+ r = amdgpu_fence_driver_init_ring(ring);
+ if (r)
+ return r;
}
- init_waitqueue_head(&ring->fence_drv.fence_queue);
-
r = amdgpu_wb_get(adev, &ring->rptr_offs);
if (r) {
dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
@@ -389,7 +407,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
if (ring->ring_obj == NULL) {
r = amdgpu_bo_create(adev, ring->ring_size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0,
- NULL, &ring->ring_obj);
+ NULL, NULL, &ring->ring_obj);
if (r) {
dev_err(adev->dev, "(%d) ring create failed\n", r);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index d6398cf45f24..e90712443fe9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -64,8 +64,8 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
INIT_LIST_HEAD(&sa_manager->flist[i]);
}
- r = amdgpu_bo_create(adev, size, align, true,
- domain, 0, NULL, &sa_manager->bo);
+ r = amdgpu_bo_create(adev, size, align, true, domain,
+ 0, NULL, NULL, &sa_manager->bo);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r);
return r;
@@ -139,6 +139,25 @@ int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
return r;
}
+static uint32_t amdgpu_sa_get_ring_from_fence(struct fence *f)
+{
+ struct amdgpu_fence *a_fence;
+ struct amd_sched_fence *s_fence;
+
+ s_fence = to_amd_sched_fence(f);
+ if (s_fence) {
+ struct amdgpu_ring *ring;
+
+ ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
+ return ring->idx;
+ }
+
+ a_fence = to_amdgpu_fence(f);
+ if (a_fence)
+ return a_fence->ring->idx;
+ return 0;
+}
+
static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
{
struct amdgpu_sa_manager *sa_manager = sa_bo->manager;
@@ -147,7 +166,7 @@ static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
}
list_del_init(&sa_bo->olist);
list_del_init(&sa_bo->flist);
- amdgpu_fence_unref(&sa_bo->fence);
+ fence_put(sa_bo->fence);
kfree(sa_bo);
}
@@ -161,7 +180,7 @@ static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager)
sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist);
list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
if (sa_bo->fence == NULL ||
- !fence_is_signaled(&sa_bo->fence->base)) {
+ !fence_is_signaled(sa_bo->fence)) {
return;
}
amdgpu_sa_bo_remove_locked(sa_bo);
@@ -246,7 +265,7 @@ static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
}
static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
- struct amdgpu_fence **fences,
+ struct fence **fences,
unsigned *tries)
{
struct amdgpu_sa_bo *best_bo = NULL;
@@ -275,7 +294,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
sa_bo = list_first_entry(&sa_manager->flist[i],
struct amdgpu_sa_bo, flist);
- if (!fence_is_signaled(&sa_bo->fence->base)) {
+ if (!fence_is_signaled(sa_bo->fence)) {
fences[i] = sa_bo->fence;
continue;
}
@@ -299,7 +318,8 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
}
if (best_bo) {
- ++tries[best_bo->fence->ring->idx];
+ uint32_t idx = amdgpu_sa_get_ring_from_fence(best_bo->fence);
+ ++tries[idx];
sa_manager->hole = best_bo->olist.prev;
/* we knew that this one is signaled,
@@ -315,7 +335,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
struct amdgpu_sa_bo **sa_bo,
unsigned size, unsigned align)
{
- struct amdgpu_fence *fences[AMDGPU_MAX_RINGS];
+ struct fence *fences[AMDGPU_MAX_RINGS];
unsigned tries[AMDGPU_MAX_RINGS];
int i, r;
signed long t;
@@ -352,7 +372,8 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
} while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
spin_unlock(&sa_manager->wq.lock);
- t = amdgpu_fence_wait_any(adev, fences, false, MAX_SCHEDULE_TIMEOUT);
+ t = amdgpu_fence_wait_any(adev, fences, AMDGPU_MAX_RINGS,
+ false, MAX_SCHEDULE_TIMEOUT);
r = (t > 0) ? 0 : t;
spin_lock(&sa_manager->wq.lock);
/* if we have nothing to wait for block */
@@ -372,7 +393,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
}
void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
- struct amdgpu_fence *fence)
+ struct fence *fence)
{
struct amdgpu_sa_manager *sa_manager;
@@ -382,10 +403,11 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
sa_manager = (*sa_bo)->manager;
spin_lock(&sa_manager->wq.lock);
- if (fence && !fence_is_signaled(&fence->base)) {
- (*sa_bo)->fence = amdgpu_fence_ref(fence);
- list_add_tail(&(*sa_bo)->flist,
- &sa_manager->flist[fence->ring->idx]);
+ if (fence && !fence_is_signaled(fence)) {
+ uint32_t idx;
+ (*sa_bo)->fence = fence_get(fence);
+ idx = amdgpu_sa_get_ring_from_fence(fence);
+ list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
} else {
amdgpu_sa_bo_remove_locked(*sa_bo);
}
@@ -395,6 +417,26 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
}
#if defined(CONFIG_DEBUG_FS)
+
+static void amdgpu_sa_bo_dump_fence(struct fence *fence, struct seq_file *m)
+{
+ struct amdgpu_fence *a_fence = to_amdgpu_fence(fence);
+ struct amd_sched_fence *s_fence = to_amd_sched_fence(fence);
+
+ if (a_fence)
+ seq_printf(m, " protected by 0x%016llx on ring %d",
+ a_fence->seq, a_fence->ring->idx);
+
+ if (s_fence) {
+ struct amdgpu_ring *ring;
+
+
+ ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
+ seq_printf(m, " protected by 0x%016x on ring %d",
+ s_fence->base.seqno, ring->idx);
+ }
+}
+
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
struct seq_file *m)
{
@@ -411,10 +453,8 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
}
seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
soffset, eoffset, eoffset - soffset);
- if (i->fence) {
- seq_printf(m, " protected by 0x%016llx on ring %d",
- i->fence->seq, i->fence->ring->idx);
- }
+ if (i->fence)
+ amdgpu_sa_bo_dump_fence(i->fence, m);
seq_printf(m, "\n");
}
spin_unlock(&sa_manager->wq.lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index a86e38158afa..2e946b2cad88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -27,119 +27,87 @@
#include <drm/drmP.h>
#include "amdgpu.h"
-static int amdgpu_sched_prepare_job(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *entity,
- struct amd_sched_job *job)
+static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
{
- int r = 0;
- struct amdgpu_cs_parser *sched_job;
- if (!job || !job->data) {
- DRM_ERROR("job is null\n");
- return -EINVAL;
- }
-
- sched_job = (struct amdgpu_cs_parser *)job->data;
- if (sched_job->prepare_job) {
- r = sched_job->prepare_job(sched_job);
- if (r) {
- DRM_ERROR("Prepare job error\n");
- schedule_work(&sched_job->job_work);
- }
- }
- return r;
+ struct amdgpu_job *job = to_amdgpu_job(sched_job);
+ return amdgpu_sync_get_fence(&job->ibs->sync);
}
-static struct fence *amdgpu_sched_run_job(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *entity,
- struct amd_sched_job *job)
+static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
{
- int r = 0;
- struct amdgpu_cs_parser *sched_job;
- struct amdgpu_fence *fence;
+ struct amdgpu_fence *fence = NULL;
+ struct amdgpu_job *job;
+ int r;
- if (!job || !job->data) {
+ if (!sched_job) {
DRM_ERROR("job is null\n");
return NULL;
}
- sched_job = (struct amdgpu_cs_parser *)job->data;
- mutex_lock(&sched_job->job_lock);
- r = amdgpu_ib_schedule(sched_job->adev,
- sched_job->num_ibs,
- sched_job->ibs,
- sched_job->filp);
- if (r)
+ job = to_amdgpu_job(sched_job);
+ mutex_lock(&job->job_lock);
+ r = amdgpu_ib_schedule(job->adev,
+ job->num_ibs,
+ job->ibs,
+ job->base.owner);
+ if (r) {
+ DRM_ERROR("Error scheduling IBs (%d)\n", r);
goto err;
- fence = amdgpu_fence_ref(sched_job->ibs[sched_job->num_ibs - 1].fence);
-
- if (sched_job->run_job) {
- r = sched_job->run_job(sched_job);
- if (r)
- goto err;
}
- mutex_unlock(&sched_job->job_lock);
- return &fence->base;
+ fence = amdgpu_fence_ref(job->ibs[job->num_ibs - 1].fence);
err:
- DRM_ERROR("Run job error\n");
- mutex_unlock(&sched_job->job_lock);
- schedule_work(&sched_job->job_work);
- return NULL;
-}
-
-static void amdgpu_sched_process_job(struct amd_gpu_scheduler *sched,
- struct amd_sched_job *job)
-{
- struct amdgpu_cs_parser *sched_job;
+ if (job->free_job)
+ job->free_job(job);
- if (!job || !job->data) {
- DRM_ERROR("job is null\n");
- return;
- }
- sched_job = (struct amdgpu_cs_parser *)job->data;
- schedule_work(&sched_job->job_work);
+ mutex_unlock(&job->job_lock);
+ fence_put(&job->base.s_fence->base);
+ kfree(job);
+ return fence ? &fence->base : NULL;
}
struct amd_sched_backend_ops amdgpu_sched_ops = {
- .prepare_job = amdgpu_sched_prepare_job,
+ .dependency = amdgpu_sched_dependency,
.run_job = amdgpu_sched_run_job,
- .process_job = amdgpu_sched_process_job
};
int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_ib *ibs,
unsigned num_ibs,
- int (*free_job)(struct amdgpu_cs_parser *),
+ int (*free_job)(struct amdgpu_job *),
void *owner,
struct fence **f)
{
int r = 0;
if (amdgpu_enable_scheduler) {
- struct amdgpu_cs_parser *sched_job =
- amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx,
- ibs, num_ibs);
- if(!sched_job) {
+ struct amdgpu_job *job =
+ kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
+ if (!job)
return -ENOMEM;
- }
- sched_job->free_job = free_job;
- mutex_lock(&sched_job->job_lock);
- r = amd_sched_push_job(ring->scheduler,
- &adev->kernel_ctx.rings[ring->idx].entity,
- sched_job, &sched_job->s_fence);
+ job->base.sched = &ring->sched;
+ job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
+ job->adev = adev;
+ job->ibs = ibs;
+ job->num_ibs = num_ibs;
+ job->base.owner = owner;
+ mutex_init(&job->job_lock);
+ job->free_job = free_job;
+ mutex_lock(&job->job_lock);
+ r = amd_sched_entity_push_job(&job->base);
if (r) {
- mutex_unlock(&sched_job->job_lock);
- kfree(sched_job);
+ mutex_unlock(&job->job_lock);
+ kfree(job);
return r;
}
- ibs[num_ibs - 1].sequence = sched_job->s_fence->v_seq;
- *f = fence_get(&sched_job->s_fence->base);
- mutex_unlock(&sched_job->job_lock);
+ *f = fence_get(&job->base.s_fence->base);
+ mutex_unlock(&job->job_lock);
} else {
r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
if (r)
return r;
*f = fence_get(&ibs[num_ibs - 1].fence->base);
}
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
index d6d41a42ab65..ff3ca52ec6fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
@@ -87,7 +87,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
void amdgpu_semaphore_free(struct amdgpu_device *adev,
struct amdgpu_semaphore **semaphore,
- struct amdgpu_fence *fence)
+ struct fence *fence)
{
if (semaphore == NULL || *semaphore == NULL) {
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 7cb711fc1ee2..4921de15b451 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -32,6 +32,11 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
+struct amdgpu_sync_entry {
+ struct hlist_node node;
+ struct fence *fence;
+};
+
/**
* amdgpu_sync_create - zero init sync object
*
@@ -49,9 +54,39 @@ void amdgpu_sync_create(struct amdgpu_sync *sync)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
sync->sync_to[i] = NULL;
+ hash_init(sync->fences);
sync->last_vm_update = NULL;
}
+static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
+{
+ struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
+ struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+
+ if (a_fence)
+ return a_fence->ring->adev == adev;
+
+ if (s_fence) {
+ struct amdgpu_ring *ring;
+
+ ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
+ return ring->adev == adev;
+ }
+
+ return false;
+}
+
+static bool amdgpu_sync_test_owner(struct fence *f, void *owner)
+{
+ struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
+ struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+ if (s_fence)
+ return s_fence->owner == owner;
+ if (a_fence)
+ return a_fence->owner == owner;
+ return false;
+}
+
/**
* amdgpu_sync_fence - remember to sync to this fence
*
@@ -62,31 +97,69 @@ void amdgpu_sync_create(struct amdgpu_sync *sync)
int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct fence *f)
{
+ struct amdgpu_sync_entry *e;
struct amdgpu_fence *fence;
struct amdgpu_fence *other;
+ struct fence *tmp, *later;
if (!f)
return 0;
+ if (amdgpu_sync_same_dev(adev, f) &&
+ amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) {
+ if (sync->last_vm_update) {
+ tmp = sync->last_vm_update;
+ BUG_ON(f->context != tmp->context);
+ later = (f->seqno - tmp->seqno <= INT_MAX) ? f : tmp;
+ sync->last_vm_update = fence_get(later);
+ fence_put(tmp);
+ } else
+ sync->last_vm_update = fence_get(f);
+ }
+
fence = to_amdgpu_fence(f);
- if (!fence || fence->ring->adev != adev)
- return fence_wait(f, true);
+ if (!fence || fence->ring->adev != adev) {
+ hash_for_each_possible(sync->fences, e, node, f->context) {
+ struct fence *new;
+ if (unlikely(e->fence->context != f->context))
+ continue;
+ new = fence_get(fence_later(e->fence, f));
+ if (new) {
+ fence_put(e->fence);
+ e->fence = new;
+ }
+ return 0;
+ }
+
+ e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL);
+ if (!e)
+ return -ENOMEM;
+
+ hash_add(sync->fences, &e->node, f->context);
+ e->fence = fence_get(f);
+ return 0;
+ }
other = sync->sync_to[fence->ring->idx];
sync->sync_to[fence->ring->idx] = amdgpu_fence_ref(
amdgpu_fence_later(fence, other));
amdgpu_fence_unref(&other);
- if (fence->owner == AMDGPU_FENCE_OWNER_VM) {
- other = sync->last_vm_update;
- sync->last_vm_update = amdgpu_fence_ref(
- amdgpu_fence_later(fence, other));
- amdgpu_fence_unref(&other);
- }
-
return 0;
}
+static void *amdgpu_sync_get_owner(struct fence *f)
+{
+ struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
+ struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+
+ if (s_fence)
+ return s_fence->owner;
+ else if (a_fence)
+ return a_fence->owner;
+ return AMDGPU_FENCE_OWNER_UNDEFINED;
+}
+
/**
* amdgpu_sync_resv - use the semaphores to sync to a reservation object
*
@@ -103,7 +176,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
{
struct reservation_object_list *flist;
struct fence *f;
- struct amdgpu_fence *fence;
+ void *fence_owner;
unsigned i;
int r = 0;
@@ -121,22 +194,22 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
for (i = 0; i < flist->shared_count; ++i) {
f = rcu_dereference_protected(flist->shared[i],
reservation_object_held(resv));
- fence = f ? to_amdgpu_fence(f) : NULL;
- if (fence && fence->ring->adev == adev) {
+ if (amdgpu_sync_same_dev(adev, f)) {
/* VM updates are only interesting
* for other VM updates and moves.
*/
+ fence_owner = amdgpu_sync_get_owner(f);
if ((owner != AMDGPU_FENCE_OWNER_MOVE) &&
- (fence->owner != AMDGPU_FENCE_OWNER_MOVE) &&
+ (fence_owner != AMDGPU_FENCE_OWNER_MOVE) &&
((owner == AMDGPU_FENCE_OWNER_VM) !=
- (fence->owner == AMDGPU_FENCE_OWNER_VM)))
+ (fence_owner == AMDGPU_FENCE_OWNER_VM)))
continue;
/* Ignore fence from the same owner as
* long as it isn't undefined.
*/
if (owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
- fence->owner == owner)
+ fence_owner == owner)
continue;
}
@@ -147,6 +220,60 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
return r;
}
+struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
+{
+ struct amdgpu_sync_entry *e;
+ struct hlist_node *tmp;
+ struct fence *f;
+ int i;
+
+ hash_for_each_safe(sync->fences, i, tmp, e, node) {
+
+ f = e->fence;
+
+ hash_del(&e->node);
+ kfree(e);
+
+ if (!fence_is_signaled(f))
+ return f;
+
+ fence_put(f);
+ }
+ return NULL;
+}
+
+int amdgpu_sync_wait(struct amdgpu_sync *sync)
+{
+ struct amdgpu_sync_entry *e;
+ struct hlist_node *tmp;
+ int i, r;
+
+ hash_for_each_safe(sync->fences, i, tmp, e, node) {
+ r = fence_wait(e->fence, false);
+ if (r)
+ return r;
+
+ hash_del(&e->node);
+ fence_put(e->fence);
+ kfree(e);
+ }
+
+ if (amdgpu_enable_semaphores)
+ return 0;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_fence *fence = sync->sync_to[i];
+ if (!fence)
+ continue;
+
+ r = fence_wait(&fence->base, false);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
/**
* amdgpu_sync_rings - sync ring to all registered fences
*
@@ -178,7 +305,8 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
return -EINVAL;
}
- if (amdgpu_enable_scheduler || (count >= AMDGPU_NUM_SYNCS)) {
+ if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores ||
+ (count >= AMDGPU_NUM_SYNCS)) {
/* not enough room, wait manually */
r = fence_wait(&fence->base, false);
if (r)
@@ -234,15 +362,23 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
*/
void amdgpu_sync_free(struct amdgpu_device *adev,
struct amdgpu_sync *sync,
- struct amdgpu_fence *fence)
+ struct fence *fence)
{
+ struct amdgpu_sync_entry *e;
+ struct hlist_node *tmp;
unsigned i;
+ hash_for_each_safe(sync->fences, i, tmp, e, node) {
+ hash_del(&e->node);
+ fence_put(e->fence);
+ kfree(e);
+ }
+
for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
amdgpu_semaphore_free(adev, &sync->semaphores[i], fence);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
amdgpu_fence_unref(&sync->sync_to[i]);
- amdgpu_fence_unref(&sync->last_vm_update);
+ fence_put(sync->last_vm_update);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index 962dd5552137..4865615e9c06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -59,8 +59,9 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
goto out_cleanup;
}
- r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, 0,
- NULL, &vram_obj);
+ r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
+ AMDGPU_GEM_DOMAIN_VRAM, 0,
+ NULL, NULL, &vram_obj);
if (r) {
DRM_ERROR("Failed to create VRAM object\n");
goto out_cleanup;
@@ -77,10 +78,11 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
void *gtt_map, *vram_map;
void **gtt_start, **gtt_end;
void **vram_start, **vram_end;
- struct amdgpu_fence *fence = NULL;
+ struct fence *fence = NULL;
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i);
+ AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
+ NULL, gtt_obj + i);
if (r) {
DRM_ERROR("Failed to create GTT object %d\n", i);
goto out_lclean;
@@ -116,13 +118,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
goto out_lclean_unpin;
}
- r = fence_wait(&fence->base, false);
+ r = fence_wait(fence, false);
if (r) {
DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
goto out_lclean_unpin;
}
- amdgpu_fence_unref(&fence);
+ fence_put(fence);
r = amdgpu_bo_kmap(vram_obj, &vram_map);
if (r) {
@@ -161,13 +163,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
goto out_lclean_unpin;
}
- r = fence_wait(&fence->base, false);
+ r = fence_wait(fence, false);
if (r) {
DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
goto out_lclean_unpin;
}
- amdgpu_fence_unref(&fence);
+ fence_put(fence);
r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
if (r) {
@@ -214,7 +216,7 @@ out_lclean:
amdgpu_bo_unref(&gtt_obj[i]);
}
if (fence)
- amdgpu_fence_unref(&fence);
+ fence_put(fence);
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index dd3415d2e45d..364cbe975332 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -228,7 +228,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
struct amdgpu_device *adev;
struct amdgpu_ring *ring;
uint64_t old_start, new_start;
- struct amdgpu_fence *fence;
+ struct fence *fence;
int r;
adev = amdgpu_get_adev(bo->bdev);
@@ -269,9 +269,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
new_mem->num_pages * PAGE_SIZE, /* bytes */
bo->resv, &fence);
/* FIXME: handle copy error */
- r = ttm_bo_move_accel_cleanup(bo, &fence->base,
+ r = ttm_bo_move_accel_cleanup(bo, fence,
evict, no_wait_gpu, new_mem);
- amdgpu_fence_unref(&fence);
+ fence_put(fence);
return r;
}
@@ -859,8 +859,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
r = amdgpu_bo_create(adev, 256 * 1024, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM, 0,
- NULL, &adev->stollen_vga_memory);
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ NULL, NULL, &adev->stollen_vga_memory);
if (r) {
return r;
}
@@ -987,46 +988,48 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
uint64_t dst_offset,
uint32_t byte_count,
struct reservation_object *resv,
- struct amdgpu_fence **fence)
+ struct fence **fence)
{
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_sync sync;
uint32_t max_bytes;
unsigned num_loops, num_dw;
+ struct amdgpu_ib *ib;
unsigned i;
int r;
- /* sync other rings */
- amdgpu_sync_create(&sync);
- if (resv) {
- r = amdgpu_sync_resv(adev, &sync, resv, false);
- if (r) {
- DRM_ERROR("sync failed (%d).\n", r);
- amdgpu_sync_free(adev, &sync, NULL);
- return r;
- }
- }
-
max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
- /* for fence and sync */
- num_dw += 64 + AMDGPU_NUM_SYNCS * 8;
+ /* for IB padding */
+ while (num_dw & 0x7)
+ num_dw++;
+
+ ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
+ if (!ib)
+ return -ENOMEM;
- r = amdgpu_ring_lock(ring, num_dw);
+ r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib);
if (r) {
- DRM_ERROR("ring lock failed (%d).\n", r);
- amdgpu_sync_free(adev, &sync, NULL);
+ kfree(ib);
return r;
}
- amdgpu_sync_rings(&sync, ring);
+ ib->length_dw = 0;
+
+ if (resv) {
+ r = amdgpu_sync_resv(adev, &ib->sync, resv,
+ AMDGPU_FENCE_OWNER_UNDEFINED);
+ if (r) {
+ DRM_ERROR("sync failed (%d).\n", r);
+ goto error_free;
+ }
+ }
for (i = 0; i < num_loops; i++) {
uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
- amdgpu_emit_copy_buffer(adev, ring, src_offset, dst_offset,
+ amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset,
cur_size_in_bytes);
src_offset += cur_size_in_bytes;
@@ -1034,17 +1037,24 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
byte_count -= cur_size_in_bytes;
}
- r = amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_MOVE, fence);
- if (r) {
- amdgpu_ring_unlock_undo(ring);
- amdgpu_sync_free(adev, &sync, NULL);
- return r;
- }
-
- amdgpu_ring_unlock_commit(ring);
- amdgpu_sync_free(adev, &sync, *fence);
+ amdgpu_vm_pad_ib(adev, ib);
+ WARN_ON(ib->length_dw > num_dw);
+ r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
+ &amdgpu_vm_free_job,
+ AMDGPU_FENCE_OWNER_MOVE,
+ fence);
+ if (r)
+ goto error_free;
+ if (!amdgpu_enable_scheduler) {
+ amdgpu_ib_free(adev, ib);
+ kfree(ib);
+ }
return 0;
+error_free:
+ amdgpu_ib_free(adev, ib);
+ kfree(ib);
+ return r;
}
#if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 482e66797ae6..5cc95f1a7dab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -247,7 +247,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
const struct common_firmware_header *header = NULL;
err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_GTT, 0, NULL, bo);
+ AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, bo);
if (err) {
dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err);
err = -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 68369cf1e318..d0312364d950 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -154,7 +154,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
+ AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE;
r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, &adev->uvd.vcpu_bo);
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ NULL, NULL, &adev->uvd.vcpu_bo);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
return r;
@@ -221,31 +223,32 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
int amdgpu_uvd_suspend(struct amdgpu_device *adev)
{
- unsigned size;
- void *ptr;
- const struct common_firmware_header *hdr;
- int i;
+ struct amdgpu_ring *ring = &adev->uvd.ring;
+ int i, r;
if (adev->uvd.vcpu_bo == NULL)
return 0;
- for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i)
- if (atomic_read(&adev->uvd.handles[i]))
- break;
-
- if (i == AMDGPU_MAX_UVD_HANDLES)
- return 0;
+ for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
+ uint32_t handle = atomic_read(&adev->uvd.handles[i]);
+ if (handle != 0) {
+ struct fence *fence;
- hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
+ amdgpu_uvd_note_usage(adev);
- size = amdgpu_bo_size(adev->uvd.vcpu_bo);
- size -= le32_to_cpu(hdr->ucode_size_bytes);
+ r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence);
+ if (r) {
+ DRM_ERROR("Error destroying UVD (%d)!\n", r);
+ continue;
+ }
- ptr = adev->uvd.cpu_addr;
- ptr += le32_to_cpu(hdr->ucode_size_bytes);
+ fence_wait(fence, false);
+ fence_put(fence);
- adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL);
- memcpy(adev->uvd.saved_bo, ptr, size);
+ adev->uvd.filp[i] = NULL;
+ atomic_set(&adev->uvd.handles[i], 0);
+ }
+ }
return 0;
}
@@ -270,12 +273,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
ptr = adev->uvd.cpu_addr;
ptr += le32_to_cpu(hdr->ucode_size_bytes);
- if (adev->uvd.saved_bo != NULL) {
- memcpy(ptr, adev->uvd.saved_bo, size);
- kfree(adev->uvd.saved_bo);
- adev->uvd.saved_bo = NULL;
- } else
- memset(ptr, 0, size);
+ memset(ptr, 0, size);
return 0;
}
@@ -545,46 +543,60 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
return -EINVAL;
}
- if (msg_type == 1) {
+ switch (msg_type) {
+ case 0:
+ /* it's a create msg, calc image size (width * height) */
+ amdgpu_bo_kunmap(bo);
+
+ /* try to alloc a new handle */
+ for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
+ if (atomic_read(&adev->uvd.handles[i]) == handle) {
+ DRM_ERROR("Handle 0x%x already in use!\n", handle);
+ return -EINVAL;
+ }
+
+ if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
+ adev->uvd.filp[i] = ctx->parser->filp;
+ return 0;
+ }
+ }
+
+ DRM_ERROR("No more free UVD handles!\n");
+ return -EINVAL;
+
+ case 1:
/* it's a decode msg, calc buffer sizes */
r = amdgpu_uvd_cs_msg_decode(msg, ctx->buf_sizes);
amdgpu_bo_kunmap(bo);
if (r)
return r;
- } else if (msg_type == 2) {
+ /* validate the handle */
+ for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
+ if (atomic_read(&adev->uvd.handles[i]) == handle) {
+ if (adev->uvd.filp[i] != ctx->parser->filp) {
+ DRM_ERROR("UVD handle collision detected!\n");
+ return -EINVAL;
+ }
+ return 0;
+ }
+ }
+
+ DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
+ return -ENOENT;
+
+ case 2:
/* it's a destroy msg, free the handle */
for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i)
atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
amdgpu_bo_kunmap(bo);
return 0;
- } else {
- /* it's a create msg */
- amdgpu_bo_kunmap(bo);
-
- if (msg_type != 0) {
- DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
- return -EINVAL;
- }
-
- /* it's a create msg, no special handling needed */
- }
-
- /* create or decode, validate the handle */
- for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
- if (atomic_read(&adev->uvd.handles[i]) == handle)
- return 0;
- }
- /* handle not found try to alloc a new one */
- for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
- if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
- adev->uvd.filp[i] = ctx->parser->filp;
- return 0;
- }
+ default:
+ DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
+ return -EINVAL;
}
-
- DRM_ERROR("No more free UVD handles!\n");
+ BUG();
return -EINVAL;
}
@@ -807,10 +819,10 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
}
static int amdgpu_uvd_free_job(
- struct amdgpu_cs_parser *sched_job)
+ struct amdgpu_job *job)
{
- amdgpu_ib_free(sched_job->adev, sched_job->ibs);
- kfree(sched_job->ibs);
+ amdgpu_ib_free(job->adev, job->ibs);
+ kfree(job->ibs);
return 0;
}
@@ -905,7 +917,9 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
int r, i;
r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, &bo);
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ NULL, NULL, &bo);
if (r)
return r;
@@ -952,7 +966,9 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
int r, i;
r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, &bo);
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ NULL, NULL, &bo);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 33ee6ae28f37..74f2038ac747 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -141,7 +141,9 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
/* allocate firmware, stack and heap BO */
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, &adev->vce.vcpu_bo);
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ NULL, NULL, &adev->vce.vcpu_bo);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
return r;
@@ -340,10 +342,10 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
}
static int amdgpu_vce_free_job(
- struct amdgpu_cs_parser *sched_job)
+ struct amdgpu_job *job)
{
- amdgpu_ib_free(sched_job->adev, sched_job->ibs);
- kfree(sched_job->ibs);
+ amdgpu_ib_free(job->adev, job->ibs);
+ kfree(job->ibs);
return 0;
}
@@ -836,6 +838,10 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring)
struct fence *fence = NULL;
int r;
+ /* skip vce ring1 ib test for now, since it's not reliable */
+ if (ring == &ring->adev->vce.ring[1])
+ return 0;
+
r = amdgpu_vce_get_create_msg(ring, 1, NULL);
if (r) {
DRM_ERROR("amdgpu: failed to get create msg (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a78a206e176e..1e14531353e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -200,19 +200,29 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
*/
void amdgpu_vm_flush(struct amdgpu_ring *ring,
struct amdgpu_vm *vm,
- struct amdgpu_fence *updates)
+ struct fence *updates)
{
uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
- struct amdgpu_fence *flushed_updates = vm_id->flushed_updates;
+ struct fence *flushed_updates = vm_id->flushed_updates;
+ bool is_earlier = false;
+
+ if (flushed_updates && updates) {
+ BUG_ON(flushed_updates->context != updates->context);
+ is_earlier = (updates->seqno - flushed_updates->seqno <=
+ INT_MAX) ? true : false;
+ }
if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates ||
- (updates && amdgpu_fence_is_earlier(flushed_updates, updates))) {
+ is_earlier) {
trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
- vm_id->flushed_updates = amdgpu_fence_ref(
- amdgpu_fence_later(flushed_updates, updates));
- amdgpu_fence_unref(&flushed_updates);
+ if (is_earlier) {
+ vm_id->flushed_updates = fence_get(updates);
+ fence_put(flushed_updates);
+ }
+ if (!flushed_updates)
+ vm_id->flushed_updates = fence_get(updates);
vm_id->pd_gpu_addr = pd_addr;
amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
}
@@ -306,13 +316,12 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
}
}
-static int amdgpu_vm_free_job(
- struct amdgpu_cs_parser *sched_job)
+int amdgpu_vm_free_job(struct amdgpu_job *job)
{
int i;
- for (i = 0; i < sched_job->num_ibs; i++)
- amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]);
- kfree(sched_job->ibs);
+ for (i = 0; i < job->num_ibs; i++)
+ amdgpu_ib_free(job->adev, &job->ibs[i]);
+ kfree(job->ibs);
return 0;
}
@@ -618,9 +627,14 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
{
uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
uint64_t last_pte = ~0, last_dst = ~0;
+ void *owner = AMDGPU_FENCE_OWNER_VM;
unsigned count = 0;
uint64_t addr;
+ /* sync to everything on unmapping */
+ if (!(flags & AMDGPU_PTE_VALID))
+ owner = AMDGPU_FENCE_OWNER_UNDEFINED;
+
/* walk over the address space and update the page tables */
for (addr = start; addr < end; ) {
uint64_t pt_idx = addr >> amdgpu_vm_block_size;
@@ -629,8 +643,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
uint64_t pte;
int r;
- amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv,
- AMDGPU_FENCE_OWNER_VM);
+ amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, owner);
r = reservation_object_reserve_shared(pt->tbo.resv);
if (r)
return r;
@@ -673,31 +686,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
}
/**
- * amdgpu_vm_fence_pts - fence page tables after an update
- *
- * @vm: requested vm
- * @start: start of GPU address range
- * @end: end of GPU address range
- * @fence: fence to use
- *
- * Fence the page tables in the range @start - @end (cayman+).
- *
- * Global and local mutex must be locked!
- */
-static void amdgpu_vm_fence_pts(struct amdgpu_vm *vm,
- uint64_t start, uint64_t end,
- struct fence *fence)
-{
- unsigned i;
-
- start >>= amdgpu_vm_block_size;
- end >>= amdgpu_vm_block_size;
-
- for (i = start; i <= end; ++i)
- amdgpu_bo_fence(vm->page_tables[i].bo, fence, true);
-}
-
-/**
* amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
*
* @adev: amdgpu_device pointer
@@ -781,17 +769,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
ib->length_dw = 0;
- if (!(flags & AMDGPU_PTE_VALID)) {
- unsigned i;
-
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_fence *f = vm->ids[i].last_id_use;
- r = amdgpu_sync_fence(adev, &ib->sync, &f->base);
- if (r)
- return r;
- }
- }
-
r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start,
mapping->it.last + 1, addr + mapping->offset,
flags, gtt_flags);
@@ -811,8 +788,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
if (r)
goto error_free;
- amdgpu_vm_fence_pts(vm, mapping->it.start,
- mapping->it.last + 1, f);
+ amdgpu_bo_fence(vm->page_directory, f, true);
if (fence) {
fence_put(*fence);
*fence = fence_get(f);
@@ -853,7 +829,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
int r;
if (mem) {
- addr = mem->start << PAGE_SHIFT;
+ addr = (u64)mem->start << PAGE_SHIFT;
if (mem->mem_type != TTM_PL_TT)
addr += adev->vm_manager.vram_base_offset;
} else {
@@ -1087,6 +1063,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
/* walk over the address space and allocate the page tables */
for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) {
+ struct reservation_object *resv = vm->page_directory->tbo.resv;
struct amdgpu_bo *pt;
if (vm->page_tables[pt_idx].bo)
@@ -1095,9 +1072,13 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
/* drop mutex to allocate and clear page table */
mutex_unlock(&vm->mutex);
+ ww_mutex_lock(&resv->lock, NULL);
r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
AMDGPU_GPU_PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, &pt);
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
+ NULL, resv, &pt);
+ ww_mutex_unlock(&resv->lock);
if (r)
goto error_free;
@@ -1297,8 +1278,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->page_directory_fence = NULL;
r = amdgpu_bo_create(adev, pd_size, align, true,
- AMDGPU_GEM_DOMAIN_VRAM, 0,
- NULL, &vm->page_directory);
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
+ NULL, NULL, &vm->page_directory);
if (r)
return r;
@@ -1347,7 +1329,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
fence_put(vm->page_directory_fence);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- amdgpu_fence_unref(&vm->ids[i].flushed_updates);
+ fence_put(vm->ids[i].flushed_updates);
amdgpu_fence_unref(&vm->ids[i].last_id_use);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index 9ba0a7d5bc8e..92b6acadfc52 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -139,7 +139,8 @@ amdgpu_atombios_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *m
tx_buf[0] = msg->address & 0xff;
tx_buf[1] = msg->address >> 8;
- tx_buf[2] = msg->request << 4;
+ tx_buf[2] = (msg->request << 4) |
+ ((msg->address >> 16) & 0xf);
tx_buf[3] = msg->size ? (msg->size - 1) : 0;
switch (msg->request & ~DP_AUX_I2C_MOT) {
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 2b4242b39b0a..9ea9de457da3 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -188,6 +188,19 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
}
+static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->nop |
+ SDMA_NOP_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->nop);
+}
+
/**
* cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine
*
@@ -213,8 +226,8 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, next_rptr);
/* IB packet must end on a 8 DW boundary */
- while ((ring->wptr & 7) != 4)
- amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
+ cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8);
+
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
@@ -501,6 +514,8 @@ static int cik_sdma_load_microcode(struct amdgpu_device *adev)
fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
+ if (adev->sdma[i].feature_version >= 20)
+ adev->sdma[i].burst_nop = true;
fw_data = (const __le32 *)
(adev->sdma[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0);
@@ -630,6 +645,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
+ memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(ring, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
@@ -814,8 +830,19 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
*/
static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib)
{
- while (ib->length_dw & 0x7)
- ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
+ struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0) |
+ SDMA_NOP_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
}
/**
@@ -1302,6 +1329,7 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
.test_ring = cik_sdma_ring_test_ring,
.test_ib = cik_sdma_ring_test_ib,
.is_lockup = cik_sdma_ring_is_lockup,
+ .insert_nop = cik_sdma_ring_insert_nop,
};
static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
@@ -1338,18 +1366,18 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev)
* Used by the amdgpu ttm implementation to move pages if
* registered as the asic copy callback.
*/
-static void cik_sdma_emit_copy_buffer(struct amdgpu_ring *ring,
+static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count)
{
- amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
- amdgpu_ring_write(ring, byte_count);
- amdgpu_ring_write(ring, 0); /* src/dst endian swap */
- amdgpu_ring_write(ring, lower_32_bits(src_offset));
- amdgpu_ring_write(ring, upper_32_bits(src_offset));
- amdgpu_ring_write(ring, lower_32_bits(dst_offset));
- amdgpu_ring_write(ring, upper_32_bits(dst_offset));
+ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
+ ib->ptr[ib->length_dw++] = byte_count;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
}
/**
@@ -1362,16 +1390,16 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ring *ring,
*
* Fill GPU buffers using the DMA engine (CIK).
*/
-static void cik_sdma_emit_fill_buffer(struct amdgpu_ring *ring,
+static void cik_sdma_emit_fill_buffer(struct amdgpu_ib *ib,
uint32_t src_data,
uint64_t dst_offset,
uint32_t byte_count)
{
- amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0));
- amdgpu_ring_write(ring, lower_32_bits(dst_offset));
- amdgpu_ring_write(ring, upper_32_bits(dst_offset));
- amdgpu_ring_write(ring, src_data);
- amdgpu_ring_write(ring, byte_count);
+ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count;
}
static const struct amdgpu_buffer_funcs cik_sdma_buffer_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index a3e3dfaa01a4..7f6d457f250a 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -487,6 +487,7 @@
(((op) & 0xFF) << 0))
/* sDMA opcodes */
#define SDMA_OPCODE_NOP 0
+# define SDMA_NOP_COUNT(x) (((x) & 0x3FFF) << 16)
#define SDMA_OPCODE_COPY 1
# define SDMA_COPY_SUB_OPCODE_LINEAR 0
# define SDMA_COPY_SUB_OPCODE_TILED 1
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
index ace870afc7d4..44fa96ad4709 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
@@ -1596,9 +1596,9 @@ static int cz_dpm_update_low_memory_pstate(struct amdgpu_device *adev)
if (pi->sys_info.nb_dpm_enable) {
if (ps->force_high)
- cz_dpm_nbdpm_lm_pstate_enable(adev, true);
- else
cz_dpm_nbdpm_lm_pstate_enable(adev, false);
+ else
+ cz_dpm_nbdpm_lm_pstate_enable(adev, true);
}
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_smc.c b/drivers/gpu/drm/amd/amdgpu/cz_smc.c
index a72ffc7d6c26..e33180d3314a 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_smc.c
@@ -814,7 +814,8 @@ int cz_smu_init(struct amdgpu_device *adev)
* 3. map kernel virtual address
*/
ret = amdgpu_bo_create(adev, priv->toc_buffer.data_size, PAGE_SIZE,
- true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, toc_buf);
+ true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
+ toc_buf);
if (ret) {
dev_err(adev->dev, "(%d) SMC TOC buffer allocation failed\n", ret);
@@ -822,7 +823,8 @@ int cz_smu_init(struct amdgpu_device *adev)
}
ret = amdgpu_bo_create(adev, priv->smu_buffer.data_size, PAGE_SIZE,
- true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, smu_buf);
+ true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
+ smu_buf);
if (ret) {
dev_err(adev->dev, "(%d) SMC Internal buffer allocation failed\n", ret);
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 4b255ac3043c..e4d101b1252a 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -1353,7 +1353,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 2);
WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_a);
+ tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_b);
tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time);
WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp);
/* restore original selection */
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 70eee807421f..6411e8244671 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -1329,7 +1329,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 2);
WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_a);
+ tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_b);
tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time);
WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp);
/* restore original selection */
diff --git a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
index 493c8c9c7faa..bda1249eb871 100644
--- a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
@@ -762,7 +762,9 @@ int fiji_smu_init(struct amdgpu_device *adev)
/* Allocate FW image data structure and header buffer */
ret = amdgpu_bo_create(adev, image_size, PAGE_SIZE,
- true, AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, toc_buf);
+ true, AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ NULL, NULL, toc_buf);
if (ret) {
DRM_ERROR("Failed to allocate memory for TOC buffer\n");
return -ENOMEM;
@@ -770,7 +772,9 @@ int fiji_smu_init(struct amdgpu_device *adev)
/* Allocate buffer for SMU internal buffer */
ret = amdgpu_bo_create(adev, smu_internal_buffer_size, PAGE_SIZE,
- true, AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, smu_buf);
+ true, AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ NULL, NULL, smu_buf);
if (ret) {
DRM_ERROR("Failed to allocate memory for SMU internal buffer\n");
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 9b0cab413677..e992bf2ff66c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2660,6 +2660,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
+ memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(ring, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
@@ -3205,7 +3206,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
r = amdgpu_bo_create(adev,
adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
+ AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
&adev->gfx.mec.hpd_eop_obj);
if (r) {
dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
@@ -3372,7 +3373,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
r = amdgpu_bo_create(adev,
sizeof(struct bonaire_mqd),
PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
+ AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
&ring->mqd_obj);
if (r) {
dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
@@ -3609,41 +3610,6 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
return 0;
}
-static void gfx_v7_0_ce_sync_me(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4;
-
- /* instruct DE to set a magic number */
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
- WRITE_DATA_DST_SEL(5)));
- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
- amdgpu_ring_write(ring, 1);
-
- /* let CE wait till condition satisfied */
- amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
- amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
- WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
- WAIT_REG_MEM_FUNCTION(3) | /* == */
- WAIT_REG_MEM_ENGINE(2))); /* ce */
- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
- amdgpu_ring_write(ring, 1);
- amdgpu_ring_write(ring, 0xffffffff);
- amdgpu_ring_write(ring, 4); /* poll interval */
-
- /* instruct CE to reset wb of ce_sync to zero */
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
- WRITE_DATA_DST_SEL(5) |
- WR_CONFIRM));
- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
- amdgpu_ring_write(ring, 0);
-}
-
/*
* vm
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -3662,6 +3628,13 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vm_id, uint64_t pd_addr)
{
int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
+ if (usepfp) {
+ /* synce CE with ME to prevent CE fetch CEIB before context switch done */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ }
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
@@ -3702,7 +3675,10 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, 0x0);
/* synce CE with ME to prevent CE fetch CEIB before context switch done */
- gfx_v7_0_ce_sync_me(ring);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
}
}
@@ -3785,7 +3761,10 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
/* save restore block */
if (adev->gfx.rlc.save_restore_obj == NULL) {
r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, &adev->gfx.rlc.save_restore_obj);
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ NULL, NULL,
+ &adev->gfx.rlc.save_restore_obj);
if (r) {
dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r);
return r;
@@ -3826,7 +3805,10 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
if (adev->gfx.rlc.clear_state_obj == NULL) {
r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, &adev->gfx.rlc.clear_state_obj);
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ NULL, NULL,
+ &adev->gfx.rlc.clear_state_obj);
if (r) {
dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
gfx_v7_0_rlc_fini(adev);
@@ -3863,7 +3845,10 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
if (adev->gfx.rlc.cp_table_size) {
if (adev->gfx.rlc.cp_table_obj == NULL) {
r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, &adev->gfx.rlc.cp_table_obj);
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ NULL, NULL,
+ &adev->gfx.rlc.cp_table_obj);
if (r) {
dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
gfx_v7_0_rlc_fini(adev);
@@ -4795,12 +4780,6 @@ static int gfx_v7_0_sw_init(void *handle)
return r;
}
- r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs);
- if (r) {
- DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r);
- return r;
- }
-
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i];
ring->ring_obj = NULL;
@@ -4844,21 +4823,21 @@ static int gfx_v7_0_sw_init(void *handle)
r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GDS, 0,
- NULL, &adev->gds.gds_gfx_bo);
+ NULL, NULL, &adev->gds.gds_gfx_bo);
if (r)
return r;
r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GWS, 0,
- NULL, &adev->gds.gws_gfx_bo);
+ NULL, NULL, &adev->gds.gws_gfx_bo);
if (r)
return r;
r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_OA, 0,
- NULL, &adev->gds.oa_gfx_bo);
+ NULL, NULL, &adev->gds.oa_gfx_bo);
if (r)
return r;
@@ -4879,8 +4858,6 @@ static int gfx_v7_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- amdgpu_wb_free(adev, adev->gfx.ce_sync_offs);
-
gfx_v7_0_cp_compute_fini(adev);
gfx_v7_0_rlc_fini(adev);
gfx_v7_0_mec_fini(adev);
@@ -5597,6 +5574,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.test_ring = gfx_v7_0_ring_test_ring,
.test_ib = gfx_v7_0_ring_test_ib,
.is_lockup = gfx_v7_0_ring_is_lockup,
+ .insert_nop = amdgpu_ring_insert_nop,
};
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
@@ -5613,6 +5591,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
.test_ring = gfx_v7_0_ring_test_ring,
.test_ib = gfx_v7_0_ring_test_ib,
.is_lockup = gfx_v7_0_ring_is_lockup,
+ .insert_nop = amdgpu_ring_insert_nop,
};
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 4b68e6306f40..cb4f68f53f24 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -622,6 +622,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
+ memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(ring, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
@@ -867,7 +868,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
r = amdgpu_bo_create(adev,
adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
+ AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
&adev->gfx.mec.hpd_eop_obj);
if (r) {
dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
@@ -939,12 +940,6 @@ static int gfx_v8_0_sw_init(void *handle)
return r;
}
- r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs);
- if (r) {
- DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r);
- return r;
- }
-
/* set up the gfx ring */
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i];
@@ -994,21 +989,21 @@ static int gfx_v8_0_sw_init(void *handle)
/* reserve GDS, GWS and OA resource for gfx */
r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_GDS, 0,
+ AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
NULL, &adev->gds.gds_gfx_bo);
if (r)
return r;
r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_GWS, 0,
+ AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
NULL, &adev->gds.gws_gfx_bo);
if (r)
return r;
r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_OA, 0,
+ AMDGPU_GEM_DOMAIN_OA, 0, NULL,
NULL, &adev->gds.oa_gfx_bo);
if (r)
return r;
@@ -1032,8 +1027,6 @@ static int gfx_v8_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- amdgpu_wb_free(adev, adev->gfx.ce_sync_offs);
-
gfx_v8_0_mec_fini(adev);
return 0;
@@ -2004,7 +1997,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
}
/**
- * gmc_v8_0_init_compute_vmid - gart enable
+ * gfx_v8_0_init_compute_vmid - gart enable
*
* @rdev: amdgpu_device pointer
*
@@ -2014,7 +2007,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
#define DEFAULT_SH_MEM_BASES (0x6000)
#define FIRST_COMPUTE_VMID (8)
#define LAST_COMPUTE_VMID (16)
-static void gmc_v8_0_init_compute_vmid(struct amdgpu_device *adev)
+static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
{
int i;
uint32_t sh_mem_config;
@@ -2281,7 +2274,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- gmc_v8_0_init_compute_vmid(adev);
+ gfx_v8_0_init_compute_vmid(adev);
mutex_lock(&adev->grbm_idx_mutex);
/*
@@ -3105,7 +3098,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
sizeof(struct vi_mqd),
PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
- &ring->mqd_obj);
+ NULL, &ring->mqd_obj);
if (r) {
dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
return r;
@@ -3239,7 +3232,8 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
/* enable the doorbell if requested */
if (use_doorbell) {
- if (adev->asic_type == CHIP_CARRIZO) {
+ if ((adev->asic_type == CHIP_CARRIZO) ||
+ (adev->asic_type == CHIP_FIJI)) {
WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
AMDGPU_DOORBELL_KIQ << 2);
WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
@@ -3963,6 +3957,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
amdgpu_ring_write(ring, lower_32_bits(seq));
amdgpu_ring_write(ring, upper_32_bits(seq));
+
}
/**
@@ -4003,49 +3998,34 @@ static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
return true;
}
-static void gfx_v8_0_ce_sync_me(struct amdgpu_ring *ring)
+static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vm_id, uint64_t pd_addr)
{
- struct amdgpu_device *adev = ring->adev;
- u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4;
-
- /* instruct DE to set a magic number */
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
- WRITE_DATA_DST_SEL(5)));
- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
- amdgpu_ring_write(ring, 1);
+ int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
+ uint64_t addr = ring->fence_drv.gpu_addr;
- /* let CE wait till condition satisfied */
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
- amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
- WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
- WAIT_REG_MEM_FUNCTION(3) | /* == */
- WAIT_REG_MEM_ENGINE(2))); /* ce */
- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
- amdgpu_ring_write(ring, 1);
+ amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
+ WAIT_REG_MEM_FUNCTION(3))); /* equal */
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq);
amdgpu_ring_write(ring, 0xffffffff);
amdgpu_ring_write(ring, 4); /* poll interval */
- /* instruct CE to reset wb of ce_sync to zero */
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
- WRITE_DATA_DST_SEL(5) |
- WR_CONFIRM));
- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
- amdgpu_ring_write(ring, 0);
-}
-
-static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned vm_id, uint64_t pd_addr)
-{
- int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
+ if (usepfp) {
+ /* synce CE with ME to prevent CE fetch CEIB before context switch done */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ }
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
- WRITE_DATA_DST_SEL(0)));
+ WRITE_DATA_DST_SEL(0)) |
+ WR_CONFIRM);
if (vm_id < 8) {
amdgpu_ring_write(ring,
(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
@@ -4081,9 +4061,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
/* sync PFP to ME, otherwise we might get invalid PFP reads */
amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
amdgpu_ring_write(ring, 0x0);
-
- /* synce CE with ME to prevent CE fetch CEIB before context switch done */
- gfx_v8_0_ce_sync_me(ring);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
}
}
@@ -4377,6 +4358,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.test_ring = gfx_v8_0_ring_test_ring,
.test_ib = gfx_v8_0_ring_test_ib,
.is_lockup = gfx_v8_0_ring_is_lockup,
+ .insert_nop = amdgpu_ring_insert_nop,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
@@ -4393,6 +4375,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.test_ring = gfx_v8_0_ring_test_ring,
.test_ib = gfx_v8_0_ring_test_ib,
.is_lockup = gfx_v8_0_ring_is_lockup,
+ .insert_nop = amdgpu_ring_insert_nop,
};
static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 10218828face..774528ab8704 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -523,17 +523,11 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
tmp = RREG32(mmVM_CONTEXT1_CNTL);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, VALID_PROTECTION_FAULT_ENABLE_INTERRUPT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, READ_PROTECTION_FAULT_ENABLE_INTERRUPT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE,
amdgpu_vm_block_size - 9);
@@ -852,6 +846,13 @@ static int gmc_v7_0_early_init(void *handle)
return 0;
}
+static int gmc_v7_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
+}
+
static int gmc_v7_0_sw_init(void *handle)
{
int r;
@@ -976,6 +977,7 @@ static int gmc_v7_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ amdgpu_irq_put(adev, &adev->mc.vm_fault, 0);
gmc_v7_0_gart_disable(adev);
return 0;
@@ -1301,7 +1303,7 @@ static int gmc_v7_0_set_powergating_state(void *handle,
const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
.early_init = gmc_v7_0_early_init,
- .late_init = NULL,
+ .late_init = gmc_v7_0_late_init,
.sw_init = gmc_v7_0_sw_init,
.sw_fini = gmc_v7_0_sw_fini,
.hw_init = gmc_v7_0_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 78109b750d29..9a07742620d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -653,19 +653,12 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
tmp = RREG32(mmVM_CONTEXT1_CNTL);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, VALID_PROTECTION_FAULT_ENABLE_INTERRUPT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, READ_PROTECTION_FAULT_ENABLE_INTERRUPT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE,
amdgpu_vm_block_size - 9);
@@ -852,6 +845,13 @@ static int gmc_v8_0_early_init(void *handle)
return 0;
}
+static int gmc_v8_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
+}
+
static int gmc_v8_0_sw_init(void *handle)
{
int r;
@@ -978,6 +978,7 @@ static int gmc_v8_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ amdgpu_irq_put(adev, &adev->mc.vm_fault, 0);
gmc_v8_0_gart_disable(adev);
return 0;
@@ -1288,7 +1289,7 @@ static int gmc_v8_0_set_powergating_state(void *handle,
const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
.early_init = gmc_v8_0_early_init,
- .late_init = NULL,
+ .late_init = gmc_v8_0_late_init,
.sw_init = gmc_v8_0_sw_init,
.sw_fini = gmc_v8_0_sw_fini,
.hw_init = gmc_v8_0_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/iceland_sdma_pkt_open.h
index c723602c7b0c..ee6a041cb288 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_sdma_pkt_open.h
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_sdma_pkt_open.h
@@ -2163,5 +2163,10 @@
#define SDMA_PKT_NOP_HEADER_sub_op_shift 8
#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift)
+/*define for count field*/
+#define SDMA_PKT_NOP_HEADER_count_offset 0
+#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF
+#define SDMA_PKT_NOP_HEADER_count_shift 16
+#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift)
#endif /* __ICELAND_SDMA_PKT_OPEN_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
index c6f1e2f12b5f..966d4b2ed9da 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
@@ -623,7 +623,9 @@ int iceland_smu_init(struct amdgpu_device *adev)
/* Allocate FW image data structure and header buffer */
ret = amdgpu_bo_create(adev, image_size, PAGE_SIZE,
- true, AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, toc_buf);
+ true, AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ NULL, NULL, toc_buf);
if (ret) {
DRM_ERROR("Failed to allocate memory for TOC buffer\n");
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 9de8104eddeb..14e87234171a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -146,6 +146,8 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data;
adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
+ if (adev->sdma[i].feature_version >= 20)
+ adev->sdma[i].burst_nop = true;
if (adev->firmware.smu_load) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
@@ -218,6 +220,19 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2);
}
+static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->nop);
+}
+
/**
* sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine
*
@@ -245,8 +260,8 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, next_rptr);
/* IB packet must end on a 8 DW boundary */
- while ((ring->wptr & 7) != 2)
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP));
+ sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
+
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
/* base must be 32 byte aligned */
@@ -689,6 +704,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
+ memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(ring, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
@@ -878,8 +894,19 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
*/
static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib)
{
- while (ib->length_dw & 0x7)
- ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+ struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
}
/**
@@ -1313,6 +1340,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
.test_ring = sdma_v2_4_ring_test_ring,
.test_ib = sdma_v2_4_ring_test_ib,
.is_lockup = sdma_v2_4_ring_is_lockup,
+ .insert_nop = sdma_v2_4_ring_insert_nop,
};
static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
@@ -1349,19 +1377,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
* Used by the amdgpu ttm implementation to move pages if
* registered as the asic copy callback.
*/
-static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ring *ring,
+static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count)
{
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
- SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR));
- amdgpu_ring_write(ring, byte_count);
- amdgpu_ring_write(ring, 0); /* src/dst endian swap */
- amdgpu_ring_write(ring, lower_32_bits(src_offset));
- amdgpu_ring_write(ring, upper_32_bits(src_offset));
- amdgpu_ring_write(ring, lower_32_bits(dst_offset));
- amdgpu_ring_write(ring, upper_32_bits(dst_offset));
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = byte_count;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
}
/**
@@ -1374,16 +1402,16 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ring *ring,
*
* Fill GPU buffers using the DMA engine (VI).
*/
-static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ring *ring,
+static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ib *ib,
uint32_t src_data,
uint64_t dst_offset,
uint32_t byte_count)
{
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL));
- amdgpu_ring_write(ring, lower_32_bits(dst_offset));
- amdgpu_ring_write(ring, upper_32_bits(dst_offset));
- amdgpu_ring_write(ring, src_data);
- amdgpu_ring_write(ring, byte_count);
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count;
}
static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 029f3455f9f9..9bfe92df15f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -218,6 +218,8 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data;
adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
+ if (adev->sdma[i].feature_version >= 20)
+ adev->sdma[i].burst_nop = true;
if (adev->firmware.smu_load) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
@@ -304,6 +306,19 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
}
}
+static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->nop);
+}
+
/**
* sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine
*
@@ -330,8 +345,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, next_rptr);
/* IB packet must end on a 8 DW boundary */
- while ((ring->wptr & 7) != 2)
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP));
+ sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
@@ -810,6 +824,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
+ memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(ring, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
@@ -998,8 +1013,19 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib,
*/
static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib)
{
- while (ib->length_dw & 0x7)
- ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+ struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
}
/**
@@ -1437,6 +1463,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
.test_ring = sdma_v3_0_ring_test_ring,
.test_ib = sdma_v3_0_ring_test_ib,
.is_lockup = sdma_v3_0_ring_is_lockup,
+ .insert_nop = sdma_v3_0_ring_insert_nop,
};
static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1473,19 +1500,19 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev)
* Used by the amdgpu ttm implementation to move pages if
* registered as the asic copy callback.
*/
-static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ring *ring,
+static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count)
{
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
- SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR));
- amdgpu_ring_write(ring, byte_count);
- amdgpu_ring_write(ring, 0); /* src/dst endian swap */
- amdgpu_ring_write(ring, lower_32_bits(src_offset));
- amdgpu_ring_write(ring, upper_32_bits(src_offset));
- amdgpu_ring_write(ring, lower_32_bits(dst_offset));
- amdgpu_ring_write(ring, upper_32_bits(dst_offset));
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = byte_count;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
}
/**
@@ -1498,16 +1525,16 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ring *ring,
*
* Fill GPU buffers using the DMA engine (VI).
*/
-static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ring *ring,
+static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ib *ib,
uint32_t src_data,
uint64_t dst_offset,
uint32_t byte_count)
{
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL));
- amdgpu_ring_write(ring, lower_32_bits(dst_offset));
- amdgpu_ring_write(ring, upper_32_bits(dst_offset));
- amdgpu_ring_write(ring, src_data);
- amdgpu_ring_write(ring, byte_count);
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count;
}
static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/tonga_sdma_pkt_open.h
index 099b7b56113c..e5ebd084288d 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_sdma_pkt_open.h
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_sdma_pkt_open.h
@@ -2236,5 +2236,10 @@
#define SDMA_PKT_NOP_HEADER_sub_op_shift 8
#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift)
+/*define for count field*/
+#define SDMA_PKT_NOP_HEADER_count_offset 0
+#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF
+#define SDMA_PKT_NOP_HEADER_count_shift 16
+#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift)
#endif /* __TONGA_SDMA_PKT_OPEN_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
index 5fc53a40c7ac..5421309c1862 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
@@ -761,7 +761,9 @@ int tonga_smu_init(struct amdgpu_device *adev)
/* Allocate FW image data structure and header buffer */
ret = amdgpu_bo_create(adev, image_size, PAGE_SIZE,
- true, AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, toc_buf);
+ true, AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ NULL, NULL, toc_buf);
if (ret) {
DRM_ERROR("Failed to allocate memory for TOC buffer\n");
return -ENOMEM;
@@ -769,7 +771,9 @@ int tonga_smu_init(struct amdgpu_device *adev)
/* Allocate buffer for SMU internal buffer */
ret = amdgpu_bo_create(adev, smu_internal_buffer_size, PAGE_SIZE,
- true, AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, smu_buf);
+ true, AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ NULL, NULL, smu_buf);
if (ret) {
DRM_ERROR("Failed to allocate memory for SMU internal buffer\n");
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 9ac383bc6c1f..ed50dd725788 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -224,11 +224,11 @@ static int uvd_v4_2_suspend(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = uvd_v4_2_hw_fini(adev);
+ r = amdgpu_uvd_suspend(adev);
if (r)
return r;
- r = amdgpu_uvd_suspend(adev);
+ r = uvd_v4_2_hw_fini(adev);
if (r)
return r;
@@ -886,6 +886,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
.test_ring = uvd_v4_2_ring_test_ring,
.test_ib = uvd_v4_2_ring_test_ib,
.is_lockup = amdgpu_ring_test_lockup,
+ .insert_nop = amdgpu_ring_insert_nop,
};
static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index de4b3f57902d..9ad8b9906c0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -220,11 +220,11 @@ static int uvd_v5_0_suspend(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = uvd_v5_0_hw_fini(adev);
+ r = amdgpu_uvd_suspend(adev);
if (r)
return r;
- r = amdgpu_uvd_suspend(adev);
+ r = uvd_v5_0_hw_fini(adev);
if (r)
return r;
@@ -825,6 +825,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
.test_ring = uvd_v5_0_ring_test_ring,
.test_ib = uvd_v5_0_ring_test_ib,
.is_lockup = amdgpu_ring_test_lockup,
+ .insert_nop = amdgpu_ring_insert_nop,
};
static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 66c975870e97..7e9934fa4193 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -214,14 +214,16 @@ static int uvd_v6_0_suspend(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ /* Skip this for APU for now */
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_uvd_suspend(adev);
+ if (r)
+ return r;
+ }
r = uvd_v6_0_hw_fini(adev);
if (r)
return r;
- r = amdgpu_uvd_suspend(adev);
- if (r)
- return r;
-
return r;
}
@@ -230,10 +232,12 @@ static int uvd_v6_0_resume(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
- if (r)
- return r;
-
+ /* Skip this for APU for now */
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_uvd_resume(adev);
+ if (r)
+ return r;
+ }
r = uvd_v6_0_hw_init(adev);
if (r)
return r;
@@ -805,6 +809,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = {
.test_ring = uvd_v6_0_ring_test_ring,
.test_ib = uvd_v6_0_ring_test_ib,
.is_lockup = amdgpu_ring_test_lockup,
+ .insert_nop = amdgpu_ring_insert_nop,
};
static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 303d961d57bd..cd16df543f64 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -643,6 +643,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
.test_ring = amdgpu_vce_ring_test_ring,
.test_ib = amdgpu_vce_ring_test_ib,
.is_lockup = amdgpu_ring_test_lockup,
+ .insert_nop = amdgpu_ring_insert_nop,
};
static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 4349658081ff..f0656dfb53f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -32,8 +32,8 @@
#include "vid.h"
#include "vce/vce_3_0_d.h"
#include "vce/vce_3_0_sh_mask.h"
-#include "oss/oss_2_0_d.h"
-#include "oss/oss_2_0_sh_mask.h"
+#include "oss/oss_3_0_d.h"
+#include "oss/oss_3_0_sh_mask.h"
#include "gca/gfx_8_0_d.h"
#include "smu/smu_7_1_2_d.h"
#include "smu/smu_7_1_2_sh_mask.h"
@@ -426,17 +426,41 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
static bool vce_v3_0_is_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 mask = 0;
+ int idx;
- return !(RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK);
+ for (idx = 0; idx < 2; ++idx) {
+ if (adev->vce.harvest_config & (1 << idx))
+ continue;
+
+ if (idx == 0)
+ mask |= SRBM_STATUS2__VCE0_BUSY_MASK;
+ else
+ mask |= SRBM_STATUS2__VCE1_BUSY_MASK;
+ }
+
+ return !(RREG32(mmSRBM_STATUS2) & mask);
}
static int vce_v3_0_wait_for_idle(void *handle)
{
unsigned i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 mask = 0;
+ int idx;
+
+ for (idx = 0; idx < 2; ++idx) {
+ if (adev->vce.harvest_config & (1 << idx))
+ continue;
+
+ if (idx == 0)
+ mask |= SRBM_STATUS2__VCE0_BUSY_MASK;
+ else
+ mask |= SRBM_STATUS2__VCE1_BUSY_MASK;
+ }
for (i = 0; i < adev->usec_timeout; i++) {
- if (!(RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK))
+ if (!(RREG32(mmSRBM_STATUS2) & mask))
return 0;
}
return -ETIMEDOUT;
@@ -445,9 +469,21 @@ static int vce_v3_0_wait_for_idle(void *handle)
static int vce_v3_0_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 mask = 0;
+ int idx;
+
+ for (idx = 0; idx < 2; ++idx) {
+ if (adev->vce.harvest_config & (1 << idx))
+ continue;
- WREG32_P(mmSRBM_SOFT_RESET, SRBM_SOFT_RESET__SOFT_RESET_VCE_MASK,
- ~SRBM_SOFT_RESET__SOFT_RESET_VCE_MASK);
+ if (idx == 0)
+ mask |= SRBM_SOFT_RESET__SOFT_RESET_VCE0_MASK;
+ else
+ mask |= SRBM_SOFT_RESET__SOFT_RESET_VCE1_MASK;
+ }
+ WREG32_P(mmSRBM_SOFT_RESET, mask,
+ ~(SRBM_SOFT_RESET__SOFT_RESET_VCE0_MASK |
+ SRBM_SOFT_RESET__SOFT_RESET_VCE1_MASK));
mdelay(5);
return vce_v3_0_start(adev);
@@ -608,6 +644,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = {
.test_ring = amdgpu_vce_ring_test_ring,
.test_ib = amdgpu_vce_ring_test_ib,
.is_lockup = amdgpu_ring_test_lockup,
+ .insert_nop = amdgpu_ring_insert_nop,
};
static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 552d9e75ad1b..b55ceb14fdcd 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1400,7 +1400,8 @@ static int vi_common_early_init(void *handle)
case CHIP_CARRIZO:
adev->has_uvd = true;
adev->cg_flags = 0;
- adev->pg_flags = AMDGPU_PG_SUPPORT_UVD | AMDGPU_PG_SUPPORT_VCE;
+ /* Disable UVD pg */
+ adev->pg_flags = /* AMDGPU_PG_SUPPORT_UVD | */AMDGPU_PG_SUPPORT_VCE;
adev->external_rev_id = adev->rev_id + 0x1;
if (amdgpu_smc_load_fw && smc_enabled)
adev->firmware.smu_load = true;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index c991973019d0..c6a1b4cc6458 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -31,7 +31,7 @@
#include <uapi/linux/kfd_ioctl.h>
#include <linux/time.h>
#include <linux/mm.h>
-#include <uapi/asm-generic/mman-common.h>
+#include <linux/mman.h>
#include <asm/processor.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 35b987574633..2b655103ba79 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -33,7 +33,7 @@
#include <linux/time.h>
#include "kfd_priv.h"
#include <linux/mm.h>
-#include <uapi/asm-generic/mman-common.h>
+#include <linux/mman.h>
#include <asm/processor.h>
/*
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
new file mode 100644
index 000000000000..144f50acc971
--- /dev/null
+++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
@@ -0,0 +1,41 @@
+#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _GPU_SCHED_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include <drm/drmP.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gpu_sched
+#define TRACE_INCLUDE_FILE gpu_sched_trace
+
+TRACE_EVENT(amd_sched_job,
+ TP_PROTO(struct amd_sched_job *sched_job),
+ TP_ARGS(sched_job),
+ TP_STRUCT__entry(
+ __field(struct amd_sched_entity *, entity)
+ __field(const char *, name)
+ __field(u32, job_count)
+ __field(int, hw_job_count)
+ ),
+
+ TP_fast_assign(
+ __entry->entity = sched_job->s_entity;
+ __entry->name = sched_job->sched->name;
+ __entry->job_count = kfifo_len(
+ &sched_job->s_entity->job_queue) / sizeof(sched_job);
+ __entry->hw_job_count = atomic_read(
+ &sched_job->sched->hw_rq_count);
+ ),
+ TP_printk("entity=%p, ring=%s, job count:%u, hw job count:%d",
+ __entry->entity, __entry->name, __entry->job_count,
+ __entry->hw_job_count)
+);
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 265d3e2f63cc..3697eeeecf82 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -27,132 +27,79 @@
#include <drm/drmP.h>
#include "gpu_scheduler.h"
+#define CREATE_TRACE_POINTS
+#include "gpu_sched_trace.h"
+
+static struct amd_sched_job *
+amd_sched_entity_pop_job(struct amd_sched_entity *entity);
+static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
+
/* Initialize a given run queue struct */
static void amd_sched_rq_init(struct amd_sched_rq *rq)
{
+ spin_lock_init(&rq->lock);
INIT_LIST_HEAD(&rq->entities);
- mutex_init(&rq->lock);
rq->current_entity = NULL;
}
static void amd_sched_rq_add_entity(struct amd_sched_rq *rq,
struct amd_sched_entity *entity)
{
- mutex_lock(&rq->lock);
+ spin_lock(&rq->lock);
list_add_tail(&entity->list, &rq->entities);
- mutex_unlock(&rq->lock);
+ spin_unlock(&rq->lock);
}
static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq,
struct amd_sched_entity *entity)
{
- mutex_lock(&rq->lock);
+ spin_lock(&rq->lock);
list_del_init(&entity->list);
if (rq->current_entity == entity)
rq->current_entity = NULL;
- mutex_unlock(&rq->lock);
+ spin_unlock(&rq->lock);
}
/**
- * Select next entity from a specified run queue with round robin policy.
- * It could return the same entity as current one if current is the only
- * available one in the queue. Return NULL if nothing available.
+ * Select next job from a specified run queue with round robin policy.
+ * Return NULL if nothing available.
*/
-static struct amd_sched_entity *
-amd_sched_rq_select_entity(struct amd_sched_rq *rq)
+static struct amd_sched_job *
+amd_sched_rq_select_job(struct amd_sched_rq *rq)
{
- struct amd_sched_entity *entity = rq->current_entity;
+ struct amd_sched_entity *entity;
+ struct amd_sched_job *sched_job;
+ spin_lock(&rq->lock);
+
+ entity = rq->current_entity;
if (entity) {
list_for_each_entry_continue(entity, &rq->entities, list) {
- if (!kfifo_is_empty(&entity->job_queue)) {
+ sched_job = amd_sched_entity_pop_job(entity);
+ if (sched_job) {
rq->current_entity = entity;
- return rq->current_entity;
+ spin_unlock(&rq->lock);
+ return sched_job;
}
}
}
list_for_each_entry(entity, &rq->entities, list) {
- if (!kfifo_is_empty(&entity->job_queue)) {
+ sched_job = amd_sched_entity_pop_job(entity);
+ if (sched_job) {
rq->current_entity = entity;
- return rq->current_entity;
+ spin_unlock(&rq->lock);
+ return sched_job;
}
if (entity == rq->current_entity)
break;
}
- return NULL;
-}
-
-/**
- * Note: This function should only been called inside scheduler main
- * function for thread safety, there is no other protection here.
- * return ture if scheduler has something ready to run.
- *
- * For active_hw_rq, there is only one producer(scheduler thread) and
- * one consumer(ISR). It should be safe to use this function in scheduler
- * main thread to decide whether to continue emit more IBs.
-*/
-static bool is_scheduler_ready(struct amd_gpu_scheduler *sched)
-{
- unsigned long flags;
- bool full;
-
- spin_lock_irqsave(&sched->queue_lock, flags);
- full = atomic64_read(&sched->hw_rq_count) <
- sched->hw_submission_limit ? true : false;
- spin_unlock_irqrestore(&sched->queue_lock, flags);
-
- return full;
-}
-
-/**
- * Select next entity from the kernel run queue, if not available,
- * return null.
-*/
-static struct amd_sched_entity *
-kernel_rq_select_context(struct amd_gpu_scheduler *sched)
-{
- struct amd_sched_entity *sched_entity;
- struct amd_sched_rq *rq = &sched->kernel_rq;
-
- mutex_lock(&rq->lock);
- sched_entity = amd_sched_rq_select_entity(rq);
- mutex_unlock(&rq->lock);
- return sched_entity;
-}
-
-/**
- * Select next entity containing real IB submissions
-*/
-static struct amd_sched_entity *
-select_context(struct amd_gpu_scheduler *sched)
-{
- struct amd_sched_entity *wake_entity = NULL;
- struct amd_sched_entity *tmp;
- struct amd_sched_rq *rq;
+ spin_unlock(&rq->lock);
- if (!is_scheduler_ready(sched))
- return NULL;
-
- /* Kernel run queue has higher priority than normal run queue*/
- tmp = kernel_rq_select_context(sched);
- if (tmp != NULL)
- goto exit;
-
- rq = &sched->sched_rq;
- mutex_lock(&rq->lock);
- tmp = amd_sched_rq_select_entity(rq);
- mutex_unlock(&rq->lock);
-exit:
- if (sched->current_entity && (sched->current_entity != tmp))
- wake_entity = sched->current_entity;
- sched->current_entity = tmp;
- if (wake_entity && wake_entity->need_wakeup)
- wake_up(&wake_entity->wait_queue);
- return tmp;
+ return NULL;
}
/**
@@ -171,34 +118,27 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
struct amd_sched_rq *rq,
uint32_t jobs)
{
- uint64_t seq_ring = 0;
- char name[20];
+ int r;
if (!(sched && entity && rq))
return -EINVAL;
memset(entity, 0, sizeof(struct amd_sched_entity));
- seq_ring = ((uint64_t)sched->ring_id) << 60;
- spin_lock_init(&entity->lock);
- entity->belongto_rq = rq;
- entity->scheduler = sched;
- init_waitqueue_head(&entity->wait_queue);
- init_waitqueue_head(&entity->wait_emit);
- entity->fence_context = fence_context_alloc(1);
- snprintf(name, sizeof(name), "c_entity[%llu]", entity->fence_context);
- memcpy(entity->name, name, 20);
- entity->need_wakeup = false;
- if(kfifo_alloc(&entity->job_queue,
- jobs * sizeof(void *),
- GFP_KERNEL))
- return -EINVAL;
+ INIT_LIST_HEAD(&entity->list);
+ entity->rq = rq;
+ entity->sched = sched;
spin_lock_init(&entity->queue_lock);
- atomic64_set(&entity->last_queued_v_seq, seq_ring);
- atomic64_set(&entity->last_signaled_v_seq, seq_ring);
+ r = kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL);
+ if (r)
+ return r;
+
+ atomic_set(&entity->fence_seq, 0);
+ entity->fence_context = fence_context_alloc(1);
/* Add the entity to the run queue */
amd_sched_rq_add_entity(rq, entity);
+
return 0;
}
@@ -210,23 +150,24 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
*
* return true if entity is initialized, false otherwise
*/
-static bool is_context_entity_initialized(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *entity)
+static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched,
+ struct amd_sched_entity *entity)
{
- return entity->scheduler == sched &&
- entity->belongto_rq != NULL;
+ return entity->sched == sched &&
+ entity->rq != NULL;
}
-static bool is_context_entity_idle(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *entity)
+/**
+ * Check if entity is idle
+ *
+ * @entity The pointer to a valid scheduler entity
+ *
+ * Return true if entity don't has any unscheduled jobs.
+ */
+static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)
{
- /**
- * Idle means no pending IBs, and the entity is not
- * currently being used.
- */
- barrier();
- if ((sched->current_entity != entity) &&
- kfifo_is_empty(&entity->job_queue))
+ rmb();
+ if (kfifo_is_empty(&entity->job_queue))
return true;
return false;
@@ -238,225 +179,247 @@ static bool is_context_entity_idle(struct amd_gpu_scheduler *sched,
* @sched Pointer to scheduler instance
* @entity The pointer to a valid scheduler entity
*
- * return 0 if succeed. negative error code on failure
+ * Cleanup and free the allocated resources.
*/
-int amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *entity)
+void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
+ struct amd_sched_entity *entity)
{
- int r = 0;
- struct amd_sched_rq *rq = entity->belongto_rq;
+ struct amd_sched_rq *rq = entity->rq;
+
+ if (!amd_sched_entity_is_initialized(sched, entity))
+ return;
- if (!is_context_entity_initialized(sched, entity))
- return 0;
- entity->need_wakeup = true;
/**
* The client will not queue more IBs during this fini, consume existing
* queued IBs
*/
- r = wait_event_timeout(
- entity->wait_queue,
- is_context_entity_idle(sched, entity),
- msecs_to_jiffies(AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS)
- ) ? 0 : -1;
-
- if (r) {
- if (entity->is_pending)
- DRM_INFO("Entity %p is in waiting state during fini,\
- all pending ibs will be canceled.\n",
- entity);
- }
+ wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity));
amd_sched_rq_remove_entity(rq, entity);
kfifo_free(&entity->job_queue);
- return r;
+}
+
+static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb)
+{
+ struct amd_sched_entity *entity =
+ container_of(cb, struct amd_sched_entity, cb);
+ entity->dependency = NULL;
+ fence_put(f);
+ amd_sched_wakeup(entity->sched);
+}
+
+static struct amd_sched_job *
+amd_sched_entity_pop_job(struct amd_sched_entity *entity)
+{
+ struct amd_gpu_scheduler *sched = entity->sched;
+ struct amd_sched_job *sched_job;
+
+ if (ACCESS_ONCE(entity->dependency))
+ return NULL;
+
+ if (!kfifo_out_peek(&entity->job_queue, &sched_job, sizeof(sched_job)))
+ return NULL;
+
+ while ((entity->dependency = sched->ops->dependency(sched_job))) {
+
+ if (fence_add_callback(entity->dependency, &entity->cb,
+ amd_sched_entity_wakeup))
+ fence_put(entity->dependency);
+ else
+ return NULL;
+ }
+
+ return sched_job;
}
/**
- * Submit a normal job to the job queue
+ * Helper to submit a job to the job queue
*
- * @sched The pointer to the scheduler
- * @c_entity The pointer to amd_sched_entity
- * @job The pointer to job required to submit
- * return 0 if succeed. -1 if failed.
- * -2 indicate queue is full for this client, client should wait untill
- * scheduler consum some queued command.
- * -1 other fail.
-*/
-int amd_sched_push_job(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *c_entity,
- void *data,
- struct amd_sched_fence **fence)
+ * @sched_job The pointer to job required to submit
+ *
+ * Returns true if we could submit the job.
+ */
+static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
{
- struct amd_sched_job *job;
+ struct amd_sched_entity *entity = sched_job->s_entity;
+ bool added, first = false;
+
+ spin_lock(&entity->queue_lock);
+ added = kfifo_in(&entity->job_queue, &sched_job,
+ sizeof(sched_job)) == sizeof(sched_job);
+
+ if (added && kfifo_len(&entity->job_queue) == sizeof(sched_job))
+ first = true;
+
+ spin_unlock(&entity->queue_lock);
+
+ /* first job wakes up scheduler */
+ if (first)
+ amd_sched_wakeup(sched_job->sched);
+
+ return added;
+}
+
+/**
+ * Submit a job to the job queue
+ *
+ * @sched_job The pointer to job required to submit
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int amd_sched_entity_push_job(struct amd_sched_job *sched_job)
+{
+ struct amd_sched_entity *entity = sched_job->s_entity;
+ struct amd_sched_fence *fence = amd_sched_fence_create(
+ entity, sched_job->owner);
if (!fence)
- return -EINVAL;
- job = kzalloc(sizeof(struct amd_sched_job), GFP_KERNEL);
- if (!job)
return -ENOMEM;
- job->sched = sched;
- job->s_entity = c_entity;
- job->data = data;
- *fence = amd_sched_fence_create(c_entity);
- if ((*fence) == NULL) {
- kfree(job);
- return -EINVAL;
- }
- fence_get(&(*fence)->base);
- job->s_fence = *fence;
- while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *),
- &c_entity->queue_lock) != sizeof(void *)) {
- /**
- * Current context used up all its IB slots
- * wait here, or need to check whether GPU is hung
- */
- schedule();
- }
- /* first job wake up scheduler */
- if ((kfifo_len(&c_entity->job_queue) / sizeof(void *)) == 1)
- wake_up_interruptible(&sched->wait_queue);
+
+ fence_get(&fence->base);
+ sched_job->s_fence = fence;
+
+ wait_event(entity->sched->job_scheduled,
+ amd_sched_entity_in(sched_job));
+ trace_amd_sched_job(sched_job);
return 0;
}
+/**
+ * Return ture if we can push more jobs to the hw.
+ */
+static bool amd_sched_ready(struct amd_gpu_scheduler *sched)
+{
+ return atomic_read(&sched->hw_rq_count) <
+ sched->hw_submission_limit;
+}
+
+/**
+ * Wake up the scheduler when it is ready
+ */
+static void amd_sched_wakeup(struct amd_gpu_scheduler *sched)
+{
+ if (amd_sched_ready(sched))
+ wake_up_interruptible(&sched->wake_up_worker);
+}
+
+/**
+ * Select next to run
+*/
+static struct amd_sched_job *
+amd_sched_select_job(struct amd_gpu_scheduler *sched)
+{
+ struct amd_sched_job *sched_job;
+
+ if (!amd_sched_ready(sched))
+ return NULL;
+
+ /* Kernel run queue has higher priority than normal run queue*/
+ sched_job = amd_sched_rq_select_job(&sched->kernel_rq);
+ if (sched_job == NULL)
+ sched_job = amd_sched_rq_select_job(&sched->sched_rq);
+
+ return sched_job;
+}
+
static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
{
- struct amd_sched_job *sched_job =
- container_of(cb, struct amd_sched_job, cb);
- struct amd_gpu_scheduler *sched;
- unsigned long flags;
-
- sched = sched_job->sched;
- atomic64_set(&sched_job->s_entity->last_signaled_v_seq,
- sched_job->s_fence->v_seq);
- amd_sched_fence_signal(sched_job->s_fence);
- spin_lock_irqsave(&sched->queue_lock, flags);
- list_del(&sched_job->list);
- atomic64_dec(&sched->hw_rq_count);
- spin_unlock_irqrestore(&sched->queue_lock, flags);
-
- sched->ops->process_job(sched, sched_job);
- fence_put(&sched_job->s_fence->base);
- kfree(sched_job);
- wake_up_interruptible(&sched->wait_queue);
+ struct amd_sched_fence *s_fence =
+ container_of(cb, struct amd_sched_fence, cb);
+ struct amd_gpu_scheduler *sched = s_fence->sched;
+
+ atomic_dec(&sched->hw_rq_count);
+ amd_sched_fence_signal(s_fence);
+ fence_put(&s_fence->base);
+ wake_up_interruptible(&sched->wake_up_worker);
}
static int amd_sched_main(void *param)
{
- int r;
- struct amd_sched_job *job;
struct sched_param sparam = {.sched_priority = 1};
- struct amd_sched_entity *c_entity = NULL;
struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param;
+ int r, count;
sched_setscheduler(current, SCHED_FIFO, &sparam);
while (!kthread_should_stop()) {
+ struct amd_sched_entity *entity;
+ struct amd_sched_fence *s_fence;
+ struct amd_sched_job *sched_job;
struct fence *fence;
- wait_event_interruptible(sched->wait_queue,
- is_scheduler_ready(sched) &&
- (c_entity = select_context(sched)));
- r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *));
- if (r != sizeof(void *))
+ wait_event_interruptible(sched->wake_up_worker,
+ kthread_should_stop() ||
+ (sched_job = amd_sched_select_job(sched)));
+
+ if (!sched_job)
continue;
- r = sched->ops->prepare_job(sched, c_entity, job);
- if (!r) {
- unsigned long flags;
- spin_lock_irqsave(&sched->queue_lock, flags);
- list_add_tail(&job->list, &sched->active_hw_rq);
- atomic64_inc(&sched->hw_rq_count);
- spin_unlock_irqrestore(&sched->queue_lock, flags);
- }
- mutex_lock(&sched->sched_lock);
- fence = sched->ops->run_job(sched, c_entity, job);
+
+ entity = sched_job->s_entity;
+ s_fence = sched_job->s_fence;
+ atomic_inc(&sched->hw_rq_count);
+ fence = sched->ops->run_job(sched_job);
if (fence) {
- r = fence_add_callback(fence, &job->cb,
+ r = fence_add_callback(fence, &s_fence->cb,
amd_sched_process_job);
if (r == -ENOENT)
- amd_sched_process_job(fence, &job->cb);
+ amd_sched_process_job(fence, &s_fence->cb);
else if (r)
DRM_ERROR("fence add callback failed (%d)\n", r);
fence_put(fence);
+ } else {
+ DRM_ERROR("Failed to run job!\n");
+ amd_sched_process_job(NULL, &s_fence->cb);
}
- mutex_unlock(&sched->sched_lock);
+
+ count = kfifo_out(&entity->job_queue, &sched_job,
+ sizeof(sched_job));
+ WARN_ON(count != sizeof(sched_job));
+ wake_up(&sched->job_scheduled);
}
return 0;
}
/**
- * Create a gpu scheduler
+ * Init a gpu scheduler instance
*
- * @device The device context for this scheduler
- * @ops The backend operations for this scheduler.
- * @id The scheduler is per ring, here is ring id.
- * @granularity The minumum ms unit the scheduler will scheduled.
- * @preemption Indicate whether this ring support preemption, 0 is no.
+ * @sched The pointer to the scheduler
+ * @ops The backend operations for this scheduler.
+ * @hw_submissions Number of hw submissions to do.
+ * @name Name used for debugging
*
- * return the pointer to scheduler for success, otherwise return NULL
+ * Return 0 on success, otherwise error code.
*/
-struct amd_gpu_scheduler *amd_sched_create(void *device,
- struct amd_sched_backend_ops *ops,
- unsigned ring,
- unsigned granularity,
- unsigned preemption,
- unsigned hw_submission)
+int amd_sched_init(struct amd_gpu_scheduler *sched,
+ struct amd_sched_backend_ops *ops,
+ unsigned hw_submission, const char *name)
{
- struct amd_gpu_scheduler *sched;
- char name[20];
-
- sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL);
- if (!sched)
- return NULL;
-
- sched->device = device;
sched->ops = ops;
- sched->granularity = granularity;
- sched->ring_id = ring;
- sched->preemption = preemption;
sched->hw_submission_limit = hw_submission;
- snprintf(name, sizeof(name), "gpu_sched[%d]", ring);
- mutex_init(&sched->sched_lock);
- spin_lock_init(&sched->queue_lock);
+ sched->name = name;
amd_sched_rq_init(&sched->sched_rq);
amd_sched_rq_init(&sched->kernel_rq);
- init_waitqueue_head(&sched->wait_queue);
- INIT_LIST_HEAD(&sched->active_hw_rq);
- atomic64_set(&sched->hw_rq_count, 0);
+ init_waitqueue_head(&sched->wake_up_worker);
+ init_waitqueue_head(&sched->job_scheduled);
+ atomic_set(&sched->hw_rq_count, 0);
+
/* Each scheduler will run on a seperate kernel thread */
- sched->thread = kthread_create(amd_sched_main, sched, name);
- if (sched->thread) {
- wake_up_process(sched->thread);
- return sched;
+ sched->thread = kthread_run(amd_sched_main, sched, sched->name);
+ if (IS_ERR(sched->thread)) {
+ DRM_ERROR("Failed to create scheduler for %s.\n", name);
+ return PTR_ERR(sched->thread);
}
- DRM_ERROR("Failed to create scheduler for id %d.\n", ring);
- kfree(sched);
- return NULL;
+ return 0;
}
/**
* Destroy a gpu scheduler
*
* @sched The pointer to the scheduler
- *
- * return 0 if succeed. -1 if failed.
*/
-int amd_sched_destroy(struct amd_gpu_scheduler *sched)
+void amd_sched_fini(struct amd_gpu_scheduler *sched)
{
kthread_stop(sched->thread);
- kfree(sched);
- return 0;
-}
-
-/**
- * Get next queued sequence number
- *
- * @entity The context entity
- *
- * return the next queued sequence number
-*/
-uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity)
-{
- return atomic64_read(&c_entity->last_queued_v_seq) + 1;
}
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index ceb5918bfbeb..80b64dc22214 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -27,8 +27,6 @@
#include <linux/kfifo.h>
#include <linux/fence.h>
-#define AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
-
struct amd_gpu_scheduler;
struct amd_sched_rq;
@@ -40,21 +38,17 @@ struct amd_sched_rq;
*/
struct amd_sched_entity {
struct list_head list;
- struct amd_sched_rq *belongto_rq;
- spinlock_t lock;
- /* the virtual_seq is unique per context per ring */
- atomic64_t last_queued_v_seq;
- atomic64_t last_signaled_v_seq;
- /* the job_queue maintains the jobs submitted by clients */
- struct kfifo job_queue;
+ struct amd_sched_rq *rq;
+ struct amd_gpu_scheduler *sched;
+
spinlock_t queue_lock;
- struct amd_gpu_scheduler *scheduler;
- wait_queue_head_t wait_queue;
- wait_queue_head_t wait_emit;
- bool is_pending;
+ struct kfifo job_queue;
+
+ atomic_t fence_seq;
uint64_t fence_context;
- char name[20];
- bool need_wakeup;
+
+ struct fence *dependency;
+ struct fence_cb cb;
};
/**
@@ -63,7 +57,7 @@ struct amd_sched_entity {
* the next entity to emit commands from.
*/
struct amd_sched_rq {
- struct mutex lock;
+ spinlock_t lock;
struct list_head entities;
struct amd_sched_entity *current_entity;
};
@@ -71,18 +65,16 @@ struct amd_sched_rq {
struct amd_sched_fence {
struct fence base;
struct fence_cb cb;
- struct amd_sched_entity *entity;
- uint64_t v_seq;
+ struct amd_gpu_scheduler *sched;
spinlock_t lock;
+ void *owner;
};
struct amd_sched_job {
- struct list_head list;
- struct fence_cb cb;
struct amd_gpu_scheduler *sched;
struct amd_sched_entity *s_entity;
- void *data;
struct amd_sched_fence *s_fence;
+ void *owner;
};
extern const struct fence_ops amd_sched_fence_ops;
@@ -101,61 +93,40 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
* these functions should be implemented in driver side
*/
struct amd_sched_backend_ops {
- int (*prepare_job)(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *c_entity,
- struct amd_sched_job *job);
- struct fence *(*run_job)(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *c_entity,
- struct amd_sched_job *job);
- void (*process_job)(struct amd_gpu_scheduler *sched,
- struct amd_sched_job *job);
+ struct fence *(*dependency)(struct amd_sched_job *sched_job);
+ struct fence *(*run_job)(struct amd_sched_job *sched_job);
};
/**
* One scheduler is implemented for each hardware ring
*/
struct amd_gpu_scheduler {
- void *device;
- struct task_struct *thread;
+ struct amd_sched_backend_ops *ops;
+ uint32_t hw_submission_limit;
+ const char *name;
struct amd_sched_rq sched_rq;
struct amd_sched_rq kernel_rq;
- struct list_head active_hw_rq;
- atomic64_t hw_rq_count;
- struct amd_sched_backend_ops *ops;
- uint32_t ring_id;
- uint32_t granularity; /* in ms unit */
- uint32_t preemption;
- wait_queue_head_t wait_queue;
- struct amd_sched_entity *current_entity;
- struct mutex sched_lock;
- spinlock_t queue_lock;
- uint32_t hw_submission_limit;
+ wait_queue_head_t wake_up_worker;
+ wait_queue_head_t job_scheduled;
+ atomic_t hw_rq_count;
+ struct task_struct *thread;
};
-struct amd_gpu_scheduler *amd_sched_create(void *device,
- struct amd_sched_backend_ops *ops,
- uint32_t ring,
- uint32_t granularity,
- uint32_t preemption,
- uint32_t hw_submission);
-int amd_sched_destroy(struct amd_gpu_scheduler *sched);
-
-int amd_sched_push_job(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *c_entity,
- void *data,
- struct amd_sched_fence **fence);
+int amd_sched_init(struct amd_gpu_scheduler *sched,
+ struct amd_sched_backend_ops *ops,
+ uint32_t hw_submission, const char *name);
+void amd_sched_fini(struct amd_gpu_scheduler *sched);
int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity,
struct amd_sched_rq *rq,
uint32_t jobs);
-int amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *entity);
-
-uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity);
+void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
+ struct amd_sched_entity *entity);
+int amd_sched_entity_push_job(struct amd_sched_job *sched_job);
struct amd_sched_fence *amd_sched_fence_create(
- struct amd_sched_entity *s_entity);
+ struct amd_sched_entity *s_entity, void *owner);
void amd_sched_fence_signal(struct amd_sched_fence *fence);
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
index a4751598c0b4..d802638094f4 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
@@ -27,19 +27,22 @@
#include <drm/drmP.h>
#include "gpu_scheduler.h"
-struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity)
+struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity, void *owner)
{
struct amd_sched_fence *fence = NULL;
+ unsigned seq;
+
fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL);
if (fence == NULL)
return NULL;
- fence->v_seq = atomic64_inc_return(&s_entity->last_queued_v_seq);
- fence->entity = s_entity;
+ fence->owner = owner;
+ fence->sched = s_entity->sched;
spin_lock_init(&fence->lock);
- fence_init(&fence->base, &amd_sched_fence_ops,
- &fence->lock,
- s_entity->fence_context,
- fence->v_seq);
+
+ seq = atomic_inc_return(&s_entity->fence_seq);
+ fence_init(&fence->base, &amd_sched_fence_ops, &fence->lock,
+ s_entity->fence_context, seq);
+
return fence;
}
@@ -60,7 +63,7 @@ static const char *amd_sched_fence_get_driver_name(struct fence *fence)
static const char *amd_sched_fence_get_timeline_name(struct fence *f)
{
struct amd_sched_fence *fence = to_amd_sched_fence(f);
- return (const char *)fence->entity->name;
+ return (const char *)fence->sched->name;
}
static bool amd_sched_fence_enable_signaling(struct fence *f)
OpenPOWER on IntegriCloud