diff options
author | Johannes Berg <johannes.berg@intel.com> | 2012-03-19 17:12:06 +0100 |
---|---|---|
committer | Wey-Yi Guy <wey-yi.w.guy@intel.com> | 2012-04-18 07:32:28 -0700 |
commit | bf8440e6a6f5fabf7843dbfecb1745e49182fa1c (patch) | |
tree | 7c7e8168f65d4af3bf89617d7727264e81d4fc83 /drivers/net/wireless/iwlwifi/iwl-trans-pcie.c | |
parent | 682e5f64de0ab5be3fb2de9f66a1da87de48ec09 (diff) | |
download | blackbird-obmc-linux-bf8440e6a6f5fabf7843dbfecb1745e49182fa1c.tar.gz blackbird-obmc-linux-bf8440e6a6f5fabf7843dbfecb1745e49182fa1c.zip |
iwlwifi: improve TX cache footprint
Having cmd[], meta[] and skbs[] as separate arrays
in the TX queue structure is cache inefficient as
we need the data for a given entry together.
To improve this, create an array with these three
members (allocate meta as part of that struct) so
we have the data we need together located together
improving cache footprint.
The downside is that we need to allocate a lot of
memory in one chunk, about 10KiB (on 64-bit) which
isn't very efficient.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Diffstat (limited to 'drivers/net/wireless/iwlwifi/iwl-trans-pcie.c')
-rw-r--r-- | drivers/net/wireless/iwlwifi/iwl-trans-pcie.c | 65 |
1 files changed, 19 insertions, 46 deletions
diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c b/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c index bc610f94ea2d..e6401e8a8d4c 100644 --- a/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c +++ b/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c @@ -333,7 +333,7 @@ static int iwl_trans_txq_alloc(struct iwl_trans *trans, int i; struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - if (WARN_ON(txq->meta || txq->cmd || txq->skbs || txq->tfds)) + if (WARN_ON(txq->entries || txq->tfds)) return -EINVAL; setup_timer(&txq->stuck_timer, iwl_trans_pcie_queue_stuck_timer, @@ -342,35 +342,22 @@ static int iwl_trans_txq_alloc(struct iwl_trans *trans, txq->q.n_window = slots_num; - txq->meta = kcalloc(slots_num, sizeof(txq->meta[0]), GFP_KERNEL); - txq->cmd = kcalloc(slots_num, sizeof(txq->cmd[0]), GFP_KERNEL); + txq->entries = kcalloc(slots_num, + sizeof(struct iwl_pcie_tx_queue_entry), + GFP_KERNEL); - if (!txq->meta || !txq->cmd) + if (!txq->entries) goto error; if (txq_id == trans_pcie->cmd_queue) for (i = 0; i < slots_num; i++) { - txq->cmd[i] = kmalloc(sizeof(struct iwl_device_cmd), - GFP_KERNEL); - if (!txq->cmd[i]) + txq->entries[i].cmd = + kmalloc(sizeof(struct iwl_device_cmd), + GFP_KERNEL); + if (!txq->entries[i].cmd) goto error; } - /* Alloc driver data array and TFD circular buffer */ - /* Driver private data, only for Tx (not command) queues, - * not shared with device. */ - if (txq_id != trans_pcie->cmd_queue) { - txq->skbs = kcalloc(TFD_QUEUE_SIZE_MAX, sizeof(txq->skbs[0]), - GFP_KERNEL); - if (!txq->skbs) { - IWL_ERR(trans, "kmalloc for auxiliary BD " - "structures failed\n"); - goto error; - } - } else { - txq->skbs = NULL; - } - /* Circular buffer of transmit frame descriptors (TFDs), * shared with device */ txq->tfds = dma_alloc_coherent(trans->dev, tfd_sz, @@ -383,17 +370,11 @@ static int iwl_trans_txq_alloc(struct iwl_trans *trans, return 0; error: - kfree(txq->skbs); - txq->skbs = NULL; - /* since txq->cmd has been zeroed, - * all non allocated cmd[i] will be NULL */ - if (txq->cmd && txq_id == trans_pcie->cmd_queue) + if (txq->entries && txq_id == trans_pcie->cmd_queue) for (i = 0; i < slots_num; i++) - kfree(txq->cmd[i]); - kfree(txq->meta); - kfree(txq->cmd); - txq->meta = NULL; - txq->cmd = NULL; + kfree(txq->entries[i].cmd); + kfree(txq->entries); + txq->entries = NULL; return -ENOMEM; @@ -405,7 +386,6 @@ static int iwl_trans_txq_init(struct iwl_trans *trans, struct iwl_tx_queue *txq, int ret; txq->need_update = 0; - memset(txq->meta, 0, sizeof(txq->meta[0]) * slots_num); /* TFD_QUEUE_SIZE_MAX must be power-of-two size, otherwise * iwl_queue_inc_wrap and iwl_queue_dec_wrap are broken. */ @@ -483,7 +463,7 @@ static void iwl_tx_queue_free(struct iwl_trans *trans, int txq_id) if (txq_id == trans_pcie->cmd_queue) for (i = 0; i < txq->q.n_window; i++) - kfree(txq->cmd[i]); + kfree(txq->entries[i].cmd); /* De-alloc circular buffer of TFDs */ if (txq->q.n_bd) { @@ -492,15 +472,8 @@ static void iwl_tx_queue_free(struct iwl_trans *trans, int txq_id) memset(&txq->q.dma_addr, 0, sizeof(txq->q.dma_addr)); } - /* De-alloc array of per-TFD driver data */ - kfree(txq->skbs); - txq->skbs = NULL; - - /* deallocate arrays */ - kfree(txq->cmd); - kfree(txq->meta); - txq->cmd = NULL; - txq->meta = NULL; + kfree(txq->entries); + txq->entries = NULL; del_timer_sync(&txq->stuck_timer); @@ -1295,15 +1268,15 @@ static int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, spin_lock(&txq->lock); /* Set up driver data for this TFD */ - txq->skbs[q->write_ptr] = skb; - txq->cmd[q->write_ptr] = dev_cmd; + txq->entries[q->write_ptr].skb = skb; + txq->entries[q->write_ptr].cmd = dev_cmd; dev_cmd->hdr.cmd = REPLY_TX; dev_cmd->hdr.sequence = cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) | INDEX_TO_SEQ(q->write_ptr))); /* Set up first empty entry in queue's array of Tx/cmd buffers */ - out_meta = &txq->meta[q->write_ptr]; + out_meta = &txq->entries[q->write_ptr].meta; /* * Use the first empty entry in this queue's command buffer array |