summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/r300.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2009-09-23 16:56:27 +1000
committerDave Airlie <airlied@linux.ie>2009-09-25 13:08:18 +1000
commit513bcb4655e68706594e45dfa1d4b181500110ba (patch)
treeed457db4cfb202015866a131ad4e742503728fad /drivers/gpu/drm/radeon/r300.c
parent35e4b7af21d77933abda3d41d1672589eb6c960c (diff)
downloadblackbird-op-linux-513bcb4655e68706594e45dfa1d4b181500110ba.tar.gz
blackbird-op-linux-513bcb4655e68706594e45dfa1d4b181500110ba.zip
drm/radeon/kms: don't require up to 64k allocations. (v2)
This avoids needing to do a kmalloc > PAGE_SIZE for the main indirect buffer chunk, it adds an accessor for all reads from the chunk and caches a single page at a time for subsequent reads. changes since v1: Use a two page pool which should be the most common case a single packet spanning > PAGE_SIZE will be hit, but I'm having trouble seeing anywhere we currently generate anything like that. hopefully proper short page copying at end added parser_error flag to set deep errors instead of having to test every ib value fetch. fixed bug in patch that went to list. Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/r300.c')
-rw-r--r--drivers/gpu/drm/radeon/r300.c137
1 files changed, 49 insertions, 88 deletions
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index bb151ecdf8fc..1ebea8cc8c93 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -697,17 +697,18 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt,
unsigned idx, unsigned reg)
{
- struct radeon_cs_chunk *ib_chunk;
struct radeon_cs_reloc *reloc;
struct r100_cs_track *track;
volatile uint32_t *ib;
uint32_t tmp, tile_flags = 0;
unsigned i;
int r;
+ u32 idx_value;
ib = p->ib->ptr;
- ib_chunk = &p->chunks[p->chunk_ib_idx];
track = (struct r100_cs_track *)p->track;
+ idx_value = radeon_get_ib_value(p, idx);
+
switch(reg) {
case AVIVO_D1MODE_VLINE_START_END:
case RADEON_CRTC_GUI_TRIG_VLINE:
@@ -738,8 +739,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
return r;
}
track->cb[i].robj = reloc->robj;
- track->cb[i].offset = ib_chunk->kdata[idx];
- ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
+ track->cb[i].offset = idx_value;
+ ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
break;
case R300_ZB_DEPTHOFFSET:
r = r100_cs_packet_next_reloc(p, &reloc);
@@ -750,8 +751,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
return r;
}
track->zb.robj = reloc->robj;
- track->zb.offset = ib_chunk->kdata[idx];
- ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
+ track->zb.offset = idx_value;
+ ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
break;
case R300_TX_OFFSET_0:
case R300_TX_OFFSET_0+4:
@@ -777,32 +778,32 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
r100_cs_dump_packet(p, pkt);
return r;
}
- ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
+ ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
track->textures[i].robj = reloc->robj;
break;
/* Tracked registers */
case 0x2084:
/* VAP_VF_CNTL */
- track->vap_vf_cntl = ib_chunk->kdata[idx];
+ track->vap_vf_cntl = idx_value;
break;
case 0x20B4:
/* VAP_VTX_SIZE */
- track->vtx_size = ib_chunk->kdata[idx] & 0x7F;
+ track->vtx_size = idx_value & 0x7F;
break;
case 0x2134:
/* VAP_VF_MAX_VTX_INDX */
- track->max_indx = ib_chunk->kdata[idx] & 0x00FFFFFFUL;
+ track->max_indx = idx_value & 0x00FFFFFFUL;
break;
case 0x43E4:
/* SC_SCISSOR1 */
- track->maxy = ((ib_chunk->kdata[idx] >> 13) & 0x1FFF) + 1;
+ track->maxy = ((idx_value >> 13) & 0x1FFF) + 1;
if (p->rdev->family < CHIP_RV515) {
track->maxy -= 1440;
}
break;
case 0x4E00:
/* RB3D_CCTL */
- track->num_cb = ((ib_chunk->kdata[idx] >> 5) & 0x3) + 1;
+ track->num_cb = ((idx_value >> 5) & 0x3) + 1;
break;
case 0x4E38:
case 0x4E3C:
@@ -825,13 +826,13 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
tile_flags |= R300_COLOR_MICROTILE_ENABLE;
- tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
+ tmp = idx_value & ~(0x7 << 16);
tmp |= tile_flags;
ib[idx] = tmp;
i = (reg - 0x4E38) >> 2;
- track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
- switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
+ track->cb[i].pitch = idx_value & 0x3FFE;
+ switch (((idx_value >> 21) & 0xF)) {
case 9:
case 11:
case 12:
@@ -854,13 +855,13 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
break;
default:
DRM_ERROR("Invalid color buffer format (%d) !\n",
- ((ib_chunk->kdata[idx] >> 21) & 0xF));
+ ((idx_value >> 21) & 0xF));
return -EINVAL;
}
break;
case 0x4F00:
/* ZB_CNTL */
- if (ib_chunk->kdata[idx] & 2) {
+ if (idx_value & 2) {
track->z_enabled = true;
} else {
track->z_enabled = false;
@@ -868,7 +869,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
break;
case 0x4F10:
/* ZB_FORMAT */
- switch ((ib_chunk->kdata[idx] & 0xF)) {
+ switch ((idx_value & 0xF)) {
case 0:
case 1:
track->zb.cpp = 2;
@@ -878,7 +879,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
break;
default:
DRM_ERROR("Invalid z buffer format (%d) !\n",
- (ib_chunk->kdata[idx] & 0xF));
+ (idx_value & 0xF));
return -EINVAL;
}
break;
@@ -897,17 +898,17 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
tile_flags |= R300_DEPTHMICROTILE_TILED;;
- tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
+ tmp = idx_value & ~(0x7 << 16);
tmp |= tile_flags;
ib[idx] = tmp;
- track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
+ track->zb.pitch = idx_value & 0x3FFC;
break;
case 0x4104:
for (i = 0; i < 16; i++) {
bool enabled;
- enabled = !!(ib_chunk->kdata[idx] & (1 << i));
+ enabled = !!(idx_value & (1 << i));
track->textures[i].enabled = enabled;
}
break;
@@ -929,9 +930,9 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
case 0x44FC:
/* TX_FORMAT1_[0-15] */
i = (reg - 0x44C0) >> 2;
- tmp = (ib_chunk->kdata[idx] >> 25) & 0x3;
+ tmp = (idx_value >> 25) & 0x3;
track->textures[i].tex_coord_type = tmp;
- switch ((ib_chunk->kdata[idx] & 0x1F)) {
+ switch ((idx_value & 0x1F)) {
case R300_TX_FORMAT_X8:
case R300_TX_FORMAT_Y4X4:
case R300_TX_FORMAT_Z3Y3X2:
@@ -971,7 +972,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
break;
default:
DRM_ERROR("Invalid texture format %u\n",
- (ib_chunk->kdata[idx] & 0x1F));
+ (idx_value & 0x1F));
return -EINVAL;
break;
}
@@ -994,11 +995,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
case 0x443C:
/* TX_FILTER0_[0-15] */
i = (reg - 0x4400) >> 2;
- tmp = ib_chunk->kdata[idx] & 0x7;
+ tmp = idx_value & 0x7;
if (tmp == 2 || tmp == 4 || tmp == 6) {
track->textures[i].roundup_w = false;
}
- tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;
+ tmp = (idx_value >> 3) & 0x7;
if (tmp == 2 || tmp == 4 || tmp == 6) {
track->textures[i].roundup_h = false;
}
@@ -1021,12 +1022,12 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
case 0x453C:
/* TX_FORMAT2_[0-15] */
i = (reg - 0x4500) >> 2;
- tmp = ib_chunk->kdata[idx] & 0x3FFF;
+ tmp = idx_value & 0x3FFF;
track->textures[i].pitch = tmp + 1;
if (p->rdev->family >= CHIP_RV515) {
- tmp = ((ib_chunk->kdata[idx] >> 15) & 1) << 11;
+ tmp = ((idx_value >> 15) & 1) << 11;
track->textures[i].width_11 = tmp;
- tmp = ((ib_chunk->kdata[idx] >> 16) & 1) << 11;
+ tmp = ((idx_value >> 16) & 1) << 11;
track->textures[i].height_11 = tmp;
}
break;
@@ -1048,15 +1049,15 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
case 0x44BC:
/* TX_FORMAT0_[0-15] */
i = (reg - 0x4480) >> 2;
- tmp = ib_chunk->kdata[idx] & 0x7FF;
+ tmp = idx_value & 0x7FF;
track->textures[i].width = tmp + 1;
- tmp = (ib_chunk->kdata[idx] >> 11) & 0x7FF;
+ tmp = (idx_value >> 11) & 0x7FF;
track->textures[i].height = tmp + 1;
- tmp = (ib_chunk->kdata[idx] >> 26) & 0xF;
+ tmp = (idx_value >> 26) & 0xF;
track->textures[i].num_levels = tmp;
- tmp = ib_chunk->kdata[idx] & (1 << 31);
+ tmp = idx_value & (1 << 31);
track->textures[i].use_pitch = !!tmp;
- tmp = (ib_chunk->kdata[idx] >> 22) & 0xF;
+ tmp = (idx_value >> 22) & 0xF;
track->textures[i].txdepth = tmp;
break;
case R300_ZB_ZPASS_ADDR:
@@ -1067,7 +1068,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
r100_cs_dump_packet(p, pkt);
return r;
}
- ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
+ ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
break;
case 0x4be8:
/* valid register only on RV530 */
@@ -1085,60 +1086,20 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
static int r300_packet3_check(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt)
{
- struct radeon_cs_chunk *ib_chunk;
-
struct radeon_cs_reloc *reloc;
struct r100_cs_track *track;
volatile uint32_t *ib;
unsigned idx;
- unsigned i, c;
int r;
ib = p->ib->ptr;
- ib_chunk = &p->chunks[p->chunk_ib_idx];
idx = pkt->idx + 1;
track = (struct r100_cs_track *)p->track;
switch(pkt->opcode) {
case PACKET3_3D_LOAD_VBPNTR:
- c = ib_chunk->kdata[idx++] & 0x1F;
- track->num_arrays = c;
- for (i = 0; i < (c - 1); i+=2, idx+=3) {
- r = r100_cs_packet_next_reloc(p, &reloc);
- if (r) {
- DRM_ERROR("No reloc for packet3 %d\n",
- pkt->opcode);
- r100_cs_dump_packet(p, pkt);
- return r;
- }
- ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
- track->arrays[i + 0].robj = reloc->robj;
- track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
- track->arrays[i + 0].esize &= 0x7F;
- r = r100_cs_packet_next_reloc(p, &reloc);
- if (r) {
- DRM_ERROR("No reloc for packet3 %d\n",
- pkt->opcode);
- r100_cs_dump_packet(p, pkt);
- return r;
- }
- ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
- track->arrays[i + 1].robj = reloc->robj;
- track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24;
- track->arrays[i + 1].esize &= 0x7F;
- }
- if (c & 1) {
- r = r100_cs_packet_next_reloc(p, &reloc);
- if (r) {
- DRM_ERROR("No reloc for packet3 %d\n",
- pkt->opcode);
- r100_cs_dump_packet(p, pkt);
- return r;
- }
- ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
- track->arrays[i + 0].robj = reloc->robj;
- track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
- track->arrays[i + 0].esize &= 0x7F;
- }
+ r = r100_packet3_load_vbpntr(p, pkt, idx);
+ if (r)
+ return r;
break;
case PACKET3_INDX_BUFFER:
r = r100_cs_packet_next_reloc(p, &reloc);
@@ -1147,7 +1108,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p,
r100_cs_dump_packet(p, pkt);
return r;
}
- ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
+ ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
if (r) {
return r;
@@ -1158,11 +1119,11 @@ static int r300_packet3_check(struct radeon_cs_parser *p,
/* Number of dwords is vtx_size * (num_vertices - 1)
* PRIM_WALK must be equal to 3 vertex data in embedded
* in cmd stream */
- if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) {
+ if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) {
DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
return -EINVAL;
}
- track->vap_vf_cntl = ib_chunk->kdata[idx+1];
+ track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
track->immd_dwords = pkt->count - 1;
r = r100_cs_track_check(p->rdev, track);
if (r) {
@@ -1173,11 +1134,11 @@ static int r300_packet3_check(struct radeon_cs_parser *p,
/* Number of dwords is vtx_size * (num_vertices - 1)
* PRIM_WALK must be equal to 3 vertex data in embedded
* in cmd stream */
- if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) {
+ if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) {
DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
return -EINVAL;
}
- track->vap_vf_cntl = ib_chunk->kdata[idx];
+ track->vap_vf_cntl = radeon_get_ib_value(p, idx);
track->immd_dwords = pkt->count;
r = r100_cs_track_check(p->rdev, track);
if (r) {
@@ -1185,28 +1146,28 @@ static int r300_packet3_check(struct radeon_cs_parser *p,
}
break;
case PACKET3_3D_DRAW_VBUF:
- track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
+ track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
r = r100_cs_track_check(p->rdev, track);
if (r) {
return r;
}
break;
case PACKET3_3D_DRAW_VBUF_2:
- track->vap_vf_cntl = ib_chunk->kdata[idx];
+ track->vap_vf_cntl = radeon_get_ib_value(p, idx);
r = r100_cs_track_check(p->rdev, track);
if (r) {
return r;
}
break;
case PACKET3_3D_DRAW_INDX:
- track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
+ track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
r = r100_cs_track_check(p->rdev, track);
if (r) {
return r;
}
break;
case PACKET3_3D_DRAW_INDX_2:
- track->vap_vf_cntl = ib_chunk->kdata[idx];
+ track->vap_vf_cntl = radeon_get_ib_value(p, idx);
r = r100_cs_track_check(p->rdev, track);
if (r) {
return r;
OpenPOWER on IntegriCloud