5 files changed, 120 insertions, 23 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 5507928c8fbe..f9a38f2cd470 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -110,6 +110,7 @@
 #define GLINT_DYN_CTL_CLEARPBA_M		BIT(1)
 #define GLINT_DYN_CTL_SWINT_TRIG_M		BIT(2)
 #define GLINT_DYN_CTL_ITR_INDX_S		3
+#define GLINT_DYN_CTL_INTERVAL_S		5
 #define GLINT_DYN_CTL_SW_ITR_INDX_M		ICE_M(0x3, 25)
 #define GLINT_DYN_CTL_INTENA_MSK_M		BIT(31)
 #define GLINT_ITR(_i, _INT)			(0x00154000 + ((_i) * 8192 + (_INT) * 4))
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index a1f523a9d39d..27c3760ae5cb 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1717,22 +1717,34 @@ static u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
 static void
 ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector, u16 vector)
 {
-	u8 itr_gran = hw->itr_gran;
-
 	if (q_vector->num_ring_rx) {
 		struct ice_ring_container *rc = &q_vector->rx;
 
-		rc->itr = ITR_TO_REG(ICE_DFLT_RX_ITR, itr_gran);
+		/* if this value is set then don't overwrite with default */
+		if (!rc->itr_setting)
+			rc->itr_setting = ICE_DFLT_RX_ITR;
+
+		rc->target_itr = ITR_TO_REG(rc->itr_setting);
+		rc->next_update = jiffies + 1;
+		rc->current_itr = rc->target_itr;
 		rc->latency_range = ICE_LOW_LATENCY;
-		wr32(hw, GLINT_ITR(rc->itr_idx, vector), rc->itr);
+		wr32(hw, GLINT_ITR(rc->itr_idx, vector),
+		     ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
 	}
 
 	if (q_vector->num_ring_tx) {
 		struct ice_ring_container *rc = &q_vector->tx;
 
-		rc->itr = ITR_TO_REG(ICE_DFLT_TX_ITR, itr_gran);
+		/* if this value is set then don't overwrite with default */
+		if (!rc->itr_setting)
+			rc->itr_setting = ICE_DFLT_TX_ITR;
+
+		rc->target_itr = ITR_TO_REG(rc->itr_setting);
+		rc->next_update = jiffies + 1;
+		rc->current_itr = rc->target_itr;
 		rc->latency_range = ICE_LOW_LATENCY;
-		wr32(hw, GLINT_ITR(rc->itr_idx, vector), rc->itr);
+		wr32(hw, GLINT_ITR(rc->itr_idx, vector),
+		     ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
 	}
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 093708b5c0ef..e59f8b29af49 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1389,7 +1389,6 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf)
 {
 	struct ice_hw *hw = &pf->hw;
 	int oicr_idx, err = 0;
-	u8 itr_gran;
 	u32 val;
 
 	if (!pf->int_name[0])
@@ -1453,10 +1452,8 @@ skip_req_irq:
 	       PFINT_MBX_CTL_CAUSE_ENA_M);
 	wr32(hw, PFINT_MBX_CTL, val);
 
-	itr_gran = hw->itr_gran;
-
 	wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->hw_oicr_idx),
-	     ITR_TO_REG(ICE_ITR_8K, itr_gran));
+	     ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S);
 
 	ice_flush(hw);
 	ice_irq_dynamic_ena(hw, NULL, NULL);
@@ -1998,6 +1995,23 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf)
 }
 
 /**
+ * ice_verify_itr_gran - verify driver's assumption of ITR granularity
+ * @pf: pointer to the PF structure
+ *
+ * There is no error returned here because the driver will be able to handle a
+ * different ITR granularity, but interrupt moderation will not be accurate if
+ * the driver's assumptions are not verified. This assumption is made so we can
+ * use constants in the hot path instead of accessing structure members.
+ */
+static void ice_verify_itr_gran(struct ice_pf *pf)
+{
+	if (pf->hw.itr_gran != (ICE_ITR_GRAN_S << 1))
+		dev_warn(&pf->pdev->dev,
+			 "%d ITR granularity assumption is invalid, actual ITR granularity is %d. Interrupt moderation will be inaccurate!\n",
+			 (ICE_ITR_GRAN_S << 1), pf->hw.itr_gran);
+}
+
+/**
  * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines
  * @pf: pointer to the PF structure
  *
@@ -2163,6 +2177,7 @@ static int ice_probe(struct pci_dev *pdev,
 	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
 
 	ice_verify_cacheline_size(pf);
+	ice_verify_itr_gran(pf);
 
 	return 0;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 49fc38094185..384ac5c82e00 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1053,6 +1053,69 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 }
 
 /**
+ * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register
+ * @itr_idx: interrupt throttling index
+ * @reg_itr: interrupt throttling value adjusted based on ITR granularity
+ */
+static u32 ice_buildreg_itr(int itr_idx, u16 reg_itr)
+{
+	return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
+		(itr_idx << GLINT_DYN_CTL_ITR_INDX_S) |
+		(reg_itr << GLINT_DYN_CTL_INTERVAL_S);
+}
+
+/**
+ * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt
+ * @vsi: the VSI associated with the q_vector
+ * @q_vector: q_vector for which ITR is being updated and interrupt enabled
+ */
+static void
+ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_ring_container *rc;
+	u32 itr_val;
+
+	/* This block of logic allows us to get away with only updating
+	 * one ITR value with each interrupt. The idea is to perform a
+	 * pseudo-lazy update with the following criteria.
+	 *
+	 * 1. Rx is given higher priority than Tx if both are in same state
+	 * 2. If we must reduce an ITR that is given highest priority.
+	 * 3. We then give priority to increasing ITR based on amount.
+	 */
+	if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
+		rc = &q_vector->rx;
+		/* Rx ITR needs to be reduced, this is highest priority */
+		itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr);
+		rc->current_itr = rc->target_itr;
+	} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
+		   ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
+		    (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
+		rc = &q_vector->tx;
+		/* Tx ITR needs to be reduced, this is second priority
+		 * Tx ITR needs to be increased more than Rx, fourth priority
+		 */
+		itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr);
+		rc->current_itr = rc->target_itr;
+	} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
+		rc = &q_vector->rx;
+		/* Rx ITR needs to be increased, third priority */
+		itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr);
+		rc->current_itr = rc->target_itr;
+	} else {
+		/* Still have to re-enable the interrupts */
+		itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
+	}
+
+	if (!test_bit(__ICE_DOWN, vsi->state)) {
+		int vector = vsi->hw_base_vector + q_vector->v_idx;
+
+		wr32(hw, GLINT_DYN_CTL(vector), itr_val);
+	}
+}
+
+/**
  * ice_napi_poll - NAPI polling Rx/Tx cleanup routine
  * @napi: napi struct with our devices info in it
  * @budget: amount of work driver is allowed to do this pass, in packets
@@ -1108,7 +1171,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
 	 */
 	if (likely(napi_complete_done(napi, work_done)))
 		if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
-			ice_irq_dynamic_ena(&vsi->back->hw, vsi, q_vector);
+			ice_update_ena_itr(vsi, q_vector);
 
 	return min(work_done, budget - 1);
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 75d0eaf6c9dd..aa646002d653 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -116,16 +116,15 @@ enum ice_rx_dtype {
 /* indices into GLINT_ITR registers */
 #define ICE_RX_ITR	ICE_IDX_ITR0
 #define ICE_TX_ITR	ICE_IDX_ITR1
-#define ICE_ITR_DYNAMIC	0x8000  /* use top bit as a flag */
-#define ICE_ITR_8K	125
+#define ICE_ITR_8K	124
 #define ICE_ITR_20K	50
-#define ICE_DFLT_TX_ITR	ICE_ITR_20K
-#define ICE_DFLT_RX_ITR	ICE_ITR_20K
-/* apply ITR granularity translation to program the register. itr_gran is either
- * 2 or 4 usecs so we need to divide by 2 first then shift by that value
- */
-#define ITR_TO_REG(val, itr_gran) (((val) & ~ICE_ITR_DYNAMIC) >> \
-				   ((itr_gran) / 2))
+#define ICE_DFLT_TX_ITR	(ICE_ITR_20K | ICE_ITR_DYNAMIC)
+#define ICE_DFLT_RX_ITR	(ICE_ITR_20K | ICE_ITR_DYNAMIC)
+#define ICE_ITR_DYNAMIC	0x8000  /* used as flag for itr_setting */
+#define ITR_TO_REG(setting)	((setting) & ~ICE_ITR_DYNAMIC)
+#define ICE_ITR_GRAN_S		1	/* Assume ITR granularity is 2us */
+#define ICE_ITR_MASK		0x1FFE	/* ITR register value alignment mask */
+#define ITR_REG_ALIGN(setting)	__ALIGN_MASK(setting, ~ICE_ITR_MASK)
 
 #define ICE_DFLT_INTRL	0
 
@@ -180,13 +179,20 @@ enum ice_latency_range {
 };
 
 struct ice_ring_container {
-	/* array of pointers to rings */
+	/* head of linked-list of rings */
 	struct ice_ring *ring;
+	unsigned long next_update;	/* jiffies value of next queue update */
 	unsigned int total_bytes;	/* total bytes processed this int */
 	unsigned int total_pkts;	/* total packets processed this int */
 	enum ice_latency_range latency_range;
-	int itr_idx;	/* index in the interrupt vector */
-	u16 itr;
+	int itr_idx;		/* index in the interrupt vector */
+	u16 target_itr;		/* value in usecs divided by the hw->itr_gran */
+	u16 current_itr;	/* value in usecs divided by the hw->itr_gran */
+	/* high bit set means dynamic ITR, rest is used to store user
+	 * readable ITR value in usecs and must be converted before programming
+	 * to a register.
+	 */
+	u16 itr_setting;
 };
 
 /* iterator for handling rings in ring container */