summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorGerrit Renker <gerrit@erg.abdn.ac.uk>2008-09-04 07:30:19 +0200
committerGerrit Renker <gerrit@erg.abdn.ac.uk>2008-09-04 07:45:41 +0200
commit2b81143aa3505e2460b24b357996c2f21840ea58 (patch)
tree37f752fb85c563f965655cec834bb289fd831137 /net
parent2f3e3bbad917c426d3aba03a535809e5699de156 (diff)
downloadtalos-obmc-linux-2b81143aa3505e2460b24b357996c2f21840ea58.tar.gz
talos-obmc-linux-2b81143aa3505e2460b24b357996c2f21840ea58.zip
dccp ccid-3: Always perform receiver RTT sampling
This updates the CCID-3 receiver in part with regard to errata 610 and 611 (http://www.rfc-editor.org/errata_list.php), which change RFC 4342 to use the Receive Rate as specified in rfc3448bis, requiring to constantly sample the RTT (or use a sender RTT). Doing this requires reusing the RX history structure after dealing with a loss. The patch does not resolve how to compute X_recv if the interval is less than 1 RTT. A FIXME has been added (and is resolved in subsequent patch). Furthermore, since this is all TFRC-based functionality, the RTT estimation is now also performed by the dccp_tfrc_lib module. This further simplifies the CCID-3 code. Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Diffstat (limited to 'net')
-rw-r--r--net/dccp/ccids/ccid3.c43
-rw-r--r--net/dccp/ccids/ccid3.h2
-rw-r--r--net/dccp/ccids/lib/packet_history.c60
-rw-r--r--net/dccp/ccids/lib/packet_history.h17
4 files changed, 73 insertions, 49 deletions
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 04b183548aa8..8e64d9665a21 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -556,8 +556,8 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
* would bring X down to s/t_mbi. That is why we return
* X_recv according to rfc3448bis-06 for the moment.
*/
- u32 rtt = hcrx->rtt ? : DCCP_FALLBACK_RTT,
- s = tfrc_rx_hist_packet_size(&hcrx->hist);
+ u32 s = tfrc_rx_hist_packet_size(&hcrx->hist),
+ rtt = tfrc_rx_hist_rtt(&hcrx->hist);
hcrx->x_recv = scaled_div32(s, 2 * rtt);
break;
@@ -576,6 +576,11 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
break;
/* fall through */
case CCID3_FBACK_PERIODIC:
+ /*
+ * FIXME: check if delta is less than or equal to 1 RTT using
+ * the receiver RTT sample. This is described in Errata 610/611
+ * of RFC 4342 which reference section 6.2 of RFC 3448.
+ */
delta = ktime_us_delta(now, hcrx->tstamp_last_feedback);
if (delta <= 0)
DCCP_BUG("delta (%ld) <= 0", (long)delta);
@@ -633,8 +638,8 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
static u32 ccid3_first_li(struct sock *sk)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
- u32 x_recv, p, delta,
- s = tfrc_rx_hist_packet_size(&hcrx->hist);
+ u32 s = tfrc_rx_hist_packet_size(&hcrx->hist),
+ rtt = tfrc_rx_hist_rtt(&hcrx->hist), x_recv, p, delta;
u64 fval;
/*
@@ -645,11 +650,6 @@ static u32 ccid3_first_li(struct sock *sk)
if (unlikely(hcrx->feedback == CCID3_FBACK_NONE))
return 5;
- if (hcrx->rtt == 0) {
- DCCP_WARN("No RTT estimate available, using fallback RTT\n");
- hcrx->rtt = DCCP_FALLBACK_RTT;
- }
-
delta = ktime_to_us(net_timedelta(hcrx->tstamp_last_feedback));
x_recv = scaled_div32(hcrx->hist.bytes_recvd, delta);
if (x_recv == 0) { /* would also trigger divide-by-zero */
@@ -661,7 +661,7 @@ static u32 ccid3_first_li(struct sock *sk)
x_recv = hcrx->x_recv;
}
- fval = scaled_div32(scaled_div(s, hcrx->rtt), x_recv);
+ fval = scaled_div32(scaled_div(s, rtt), x_recv);
p = tfrc_calc_x_reverse_lookup(fval);
ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
@@ -696,25 +696,10 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
return; /* done receiving */
/*
- * Handle data packets: RTT sampling and monitoring p
- */
- if (unlikely(!is_data_packet))
- goto update_records;
-
- if (!tfrc_lh_is_initialised(&hcrx->li_hist)) {
- const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->hist, skb);
- /*
- * Empty loss history: no loss so far, hence p stays 0.
- * Sample RTT values, since an RTT estimate is required for the
- * computation of p when the first loss occurs; RFC 3448, 6.3.1.
- */
- if (sample != 0)
- hcrx->rtt = tfrc_ewma(hcrx->rtt, sample, 9);
- }
- /*
* Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3
*/
- if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->last_counter) > 3)
+ if (is_data_packet &&
+ SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->last_counter) > 3)
do_feedback = CCID3_FBACK_PERIODIC;
update_records:
@@ -744,7 +729,7 @@ static void ccid3_hc_rx_exit(struct sock *sk)
static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
{
info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
- info->tcpi_rcv_rtt = ccid3_hc_rx_sk(sk)->rtt;
+ info->tcpi_rcv_rtt = tfrc_rx_hist_rtt(&ccid3_hc_rx_sk(sk)->hist);
}
static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
@@ -759,7 +744,7 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
if (len < sizeof(rx_info))
return -EINVAL;
rx_info.tfrcrx_x_recv = hcrx->x_recv;
- rx_info.tfrcrx_rtt = hcrx->rtt;
+ rx_info.tfrcrx_rtt = tfrc_rx_hist_rtt(&hcrx->hist);
rx_info.tfrcrx_p = tfrc_invert_loss_event_rate(hcrx->p_inverse);
len = sizeof(rx_info);
val = &rx_info;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 72e110a1100f..342235c57bf3 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -127,7 +127,6 @@ enum ccid3_fback_type {
* @last_counter - Tracks window counter (RFC 4342, 8.1)
* @feedback - The type of the feedback last sent
* @x_recv - Receiver estimate of send rate (RFC 3448, sec. 4.3)
- * @rtt - Receiver estimate of RTT
* @tstamp_last_feedback - Time at which last feedback was sent
* @hist - Packet history (loss detection + RTT sampling)
* @li_hist - Loss Interval database
@@ -137,7 +136,6 @@ struct ccid3_hc_rx_sock {
u8 last_counter:4;
enum ccid3_fback_type feedback:4;
u32 x_recv;
- u32 rtt;
ktime_t tstamp_last_feedback;
struct tfrc_rx_hist hist;
struct tfrc_loss_hist li_hist;
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index ee34b4564242..e2e250aa5c89 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -151,14 +151,31 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
+
+static void __tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
+{
+ struct tfrc_rx_hist_entry *tmp = h->ring[a];
+
+ h->ring[a] = h->ring[b];
+ h->ring[b] = tmp;
+}
+
static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
{
- const u8 idx_a = tfrc_rx_hist_index(h, a),
- idx_b = tfrc_rx_hist_index(h, b);
- struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
+ __tfrc_rx_hist_swap(h, tfrc_rx_hist_index(h, a),
+ tfrc_rx_hist_index(h, b));
+}
- h->ring[idx_a] = h->ring[idx_b];
- h->ring[idx_b] = tmp;
+/**
+ * tfrc_rx_hist_resume_rtt_sampling - Prepare RX history for RTT sampling
+ * This is called after loss detection has finished, when the history entry
+ * with the index of `loss_count' holds the highest-received sequence number.
+ * RTT sampling requires this information at ring[0] (tfrc_rx_hist_sample_rtt).
+ */
+static inline void tfrc_rx_hist_resume_rtt_sampling(struct tfrc_rx_hist *h)
+{
+ __tfrc_rx_hist_swap(h, 0, tfrc_rx_hist_index(h, h->loss_count));
+ h->loss_count = h->loss_start = 0;
}
/*
@@ -200,8 +217,7 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2
if (dccp_loss_free(s2, s1, n1)) {
/* hole is filled: S0, S2, and S1 are consecutive */
- h->loss_count = 0;
- h->loss_start = tfrc_rx_hist_index(h, 1);
+ tfrc_rx_hist_resume_rtt_sampling(h);
} else
/* gap between S2 and S1: just update loss_prev */
tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);
@@ -254,8 +270,7 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
if (dccp_loss_free(s1, s2, n2)) {
/* entire hole filled by S0, S3, S1, S2 */
- h->loss_start = tfrc_rx_hist_index(h, 2);
- h->loss_count = 0;
+ tfrc_rx_hist_resume_rtt_sampling(h);
} else {
/* gap remains between S1 and S2 */
h->loss_start = tfrc_rx_hist_index(h, 1);
@@ -299,8 +314,7 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
if (dccp_loss_free(s2, s3, n3)) {
/* no gap between S2 and S3: entire hole is filled */
- h->loss_start = tfrc_rx_hist_index(h, 3);
- h->loss_count = 0;
+ tfrc_rx_hist_resume_rtt_sampling(h);
} else {
/* gap between S2 and S3 */
h->loss_start = tfrc_rx_hist_index(h, 2);
@@ -340,6 +354,7 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
if (h->loss_count == 0) {
__do_track_loss(h, skb, ndp);
+ tfrc_rx_hist_sample_rtt(h, skb);
} else if (h->loss_count == 1) {
__one_after_loss(h, skb, ndp);
} else if (h->loss_count != 2) {
@@ -435,11 +450,24 @@ static inline struct tfrc_rx_hist_entry *
* Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
* to compute a sample with given data - calling function should check this.
*/
-u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
+void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
{
- u32 sample = 0,
- delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
- tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
+ u32 sample = 0, delta_v;
+
+ /*
+ * When not to sample:
+ * - on non-data packets
+ * (RFC 4342, 8.1: CCVal only fully defined for data packets);
+ * - when no data packets have been received yet
+ * (FIXME: using sampled packet size as indicator here);
+ * - as long as there are gaps in the sequence space (pending loss).
+ */
+ if (!dccp_data_packet(skb) || h->packet_size == 0 ||
+ tfrc_rx_hist_loss_pending(h))
+ return;
+
+ delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
+ tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */
if (h->rtt_sample_prev == 2) { /* previous candidate stored */
@@ -479,6 +507,6 @@ u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
h->rtt_sample_prev = 0; /* use current entry as next reference */
keep_ref_for_next_time:
- return sample;
+ h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 9);
}
EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index b7c87a1a2720..ba5832bbc348 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -91,6 +91,7 @@ struct tfrc_rx_hist_entry {
* @loss_count: Number of entries in circular history
* @loss_start: Movable index (for loss detection)
* @rtt_sample_prev: Used during RTT sampling, points to candidate entry
+ * @rtt_estimate: Receiver RTT estimate
* @packet_size: Packet size in bytes (as per RFC 3448, 3.1)
* @bytes_recvd: Number of bytes received since last sending feedback
*/
@@ -98,7 +99,10 @@ struct tfrc_rx_hist {
struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
u8 loss_count:2,
loss_start:2;
+ /* Receiver RTT sampling */
#define rtt_sample_prev loss_start
+ u32 rtt_estimate;
+ /* Receiver sampling of application payload lengths */
u32 packet_size,
bytes_recvd;
};
@@ -154,6 +158,15 @@ static inline u32 tfrc_rx_hist_packet_size(const struct tfrc_rx_hist *h)
return TCP_MIN_RCVMSS;
}
return h->packet_size;
+
+}
+static inline u32 tfrc_rx_hist_rtt(const struct tfrc_rx_hist *h)
+{
+ if (h->rtt_estimate == 0) {
+ DCCP_WARN("No RTT estimate available, using fallback RTT\n");
+ return DCCP_FALLBACK_RTT;
+ }
+ return h->rtt_estimate;
}
extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
@@ -167,8 +180,8 @@ extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
struct sk_buff *skb, const u64 ndp,
u32 (*first_li)(struct sock *sk),
struct sock *sk);
-extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
- const struct sk_buff *skb);
+extern void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
+ const struct sk_buff *skb);
extern int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk);
extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
OpenPOWER on IntegriCloud