diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-04-27 09:26:46 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-04-27 09:26:46 -0700 |
commit | 15c54033964a943de7b0763efd3bd0ede7326395 (patch) | |
tree | 840b292612d1b5396d5bab5bde537a9013db3ceb /net/dccp | |
parent | ad5da3cf39a5b11a198929be1f2644e17ecd767e (diff) | |
parent | 912a41a4ab935ce8c4308428ec13fc7f8b1f18f4 (diff) | |
download | blackbird-op-linux-15c54033964a943de7b0763efd3bd0ede7326395.tar.gz blackbird-op-linux-15c54033964a943de7b0763efd3bd0ede7326395.zip |
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: (448 commits)
[IPV4] nl_fib_lookup: Initialise res.r before fib_res_put(&res)
[IPV6]: Fix thinko in ipv6_rthdr_rcv() changes.
[IPV4]: Add multipath cached to feature-removal-schedule.txt
[WIRELESS] cfg80211: Clarify locking comment.
[WIRELESS] cfg80211: Fix locking in wiphy_new.
[WEXT] net_device: Don't include wext bits if not required.
[WEXT]: Misc code cleanups.
[WEXT]: Reduce inline abuse.
[WEXT]: Move EXPORT_SYMBOL statements where they belong.
[WEXT]: Cleanup early ioctl call path.
[WEXT]: Remove options.
[WEXT]: Remove dead debug code.
[WEXT]: Clean up how wext is called.
[WEXT]: Move to net/wireless
[AFS]: Eliminate cmpxchg() usage in vlocation code.
[RXRPC]: Fix pointers passed to bitops.
[RXRPC]: Remove bogus atomic_* overrides.
[AFS]: Fix u64 printing in debug logging.
[AFS]: Add "directory write" support.
[AFS]: Implement the CB.InitCallBackState3 operation.
...
Diffstat (limited to 'net/dccp')
-rw-r--r-- | net/dccp/ackvec.c | 2 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.c | 322 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.h | 10 | ||||
-rw-r--r-- | net/dccp/ccids/lib/loss_interval.c | 2 | ||||
-rw-r--r-- | net/dccp/dccp.h | 75 | ||||
-rw-r--r-- | net/dccp/input.c | 54 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 43 | ||||
-rw-r--r-- | net/dccp/ipv6.c | 40 | ||||
-rw-r--r-- | net/dccp/minisocks.c | 2 | ||||
-rw-r--r-- | net/dccp/options.c | 18 | ||||
-rw-r--r-- | net/dccp/output.c | 3 | ||||
-rw-r--r-- | net/dccp/probe.c | 17 |
12 files changed, 325 insertions, 263 deletions
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index a086c6312d3b..01030f346177 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -157,7 +157,7 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) if (av != NULL) { av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1; - av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1; + av->dccpav_buf_ackno = UINT48_MAX + 1; av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; av->dccpav_time.tv_sec = 0; av->dccpav_time.tv_usec = 0; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 746f79d104b3..d7d9ce737244 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -33,7 +33,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - #include "../ccid.h" #include "../dccp.h" #include "lib/packet_history.h" @@ -52,6 +51,9 @@ static struct dccp_tx_hist *ccid3_tx_hist; static struct dccp_rx_hist *ccid3_rx_hist; static struct dccp_li_hist *ccid3_li_hist; +/* + * Transmitter Half-Connection Routines + */ #ifdef CONFIG_IP_DCCP_CCID3_DEBUG static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) { @@ -80,23 +82,37 @@ static void ccid3_hc_tx_set_state(struct sock *sk, } /* - * Recalculate scheduled nominal send time t_nom, inter-packet interval - * t_ipi, and delta value. Should be called after each change to X. + * Compute the initial sending rate X_init according to RFC 3390: + * w_init = min(4 * MSS, max(2 * MSS, 4380 bytes)) + * X_init = w_init / RTT + * For consistency with other parts of the code, X_init is scaled by 2^6. */ -static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx) +static inline u64 rfc3390_initial_rate(struct sock *sk) { - timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); + const struct dccp_sock *dp = dccp_sk(sk); + const __u32 w_init = min(4 * dp->dccps_mss_cache, + max(2 * dp->dccps_mss_cache, 4380U)); - /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ - hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s, - hctx->ccid3hctx_x >> 6); + return scaled_div(w_init << 6, ccid3_hc_tx_sk(sk)->ccid3hctx_rtt); +} - /* Update nominal send time with regard to the new t_ipi */ - timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); +/* + * Recalculate t_ipi and delta (should be called whenever X changes) + */ +static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx) +{ + /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ + hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6, + hctx->ccid3hctx_x); /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); + + ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", + hctx->ccid3hctx_t_ipi, hctx->ccid3hctx_delta, + hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6)); + } /* * Update X by @@ -112,19 +128,28 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx) * fine-grained resolution of sending rates. This requires scaling by 2^6 * throughout the code. Only X_calc is unscaled (in bytes/second). * - * If X has changed, we also update the scheduled send time t_now, - * the inter-packet interval t_ipi, and the delta value. */ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + __u64 min_rate = 2 * hctx->ccid3hctx_x_recv; const __u64 old_x = hctx->ccid3hctx_x; + /* + * Handle IDLE periods: do not reduce below RFC3390 initial sending rate + * when idling [RFC 4342, 5.1]. See also draft-ietf-dccp-rfc3448bis. + * For consistency with X and X_recv, min_rate is also scaled by 2^6. + */ + if (unlikely(hctx->ccid3hctx_idle)) { + min_rate = rfc3390_initial_rate(sk); + min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv); + } + if (hctx->ccid3hctx_p > 0) { hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6, - hctx->ccid3hctx_x_recv * 2); + min_rate); hctx->ccid3hctx_x = max(hctx->ccid3hctx_x, (((__u64)hctx->ccid3hctx_s) << 6) / TFRC_T_MBI); @@ -133,14 +158,21 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) (suseconds_t)hctx->ccid3hctx_rtt >= 0) { hctx->ccid3hctx_x = - max(2 * min(hctx->ccid3hctx_x, hctx->ccid3hctx_x_recv), + max(min(2 * hctx->ccid3hctx_x, min_rate), scaled_div(((__u64)hctx->ccid3hctx_s) << 6, hctx->ccid3hctx_rtt)); hctx->ccid3hctx_t_ld = *now; } - if (hctx->ccid3hctx_x != old_x) - ccid3_update_send_time(hctx); + if (hctx->ccid3hctx_x != old_x) { + ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, " + "X_recv=%u\n", (unsigned)(old_x >> 6), + (unsigned)(hctx->ccid3hctx_x >> 6), + hctx->ccid3hctx_x_calc, + (unsigned)(hctx->ccid3hctx_x_recv >> 6)); + + ccid3_update_send_interval(hctx); + } } /* @@ -149,17 +181,12 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) */ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) { - if (unlikely(len == 0)) - ccid3_pr_debug("Packet payload length is 0 - not updating\n"); - else - hctx->ccid3hctx_s = hctx->ccid3hctx_s == 0 ? len : - (9 * hctx->ccid3hctx_s + len) / 10; - /* - * Note: We could do a potential optimisation here - when `s' changes, - * recalculate sending rate and consequently t_ipi, t_delta, and - * t_now. This is however non-standard, and the benefits are not - * clear, so it is currently left out. - */ + const u16 old_s = hctx->ccid3hctx_s; + + hctx->ccid3hctx_s = old_s == 0 ? len : (9 * old_s + len) / 10; + + if (hctx->ccid3hctx_s != old_s) + ccid3_update_send_interval(hctx); } /* @@ -193,6 +220,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) { struct sock *sk = (struct sock *)data; struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + struct timeval now; unsigned long t_nfb = USEC_PER_SEC / 5; bh_lock_sock(sk); @@ -205,6 +233,8 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state)); + hctx->ccid3hctx_idle = 1; + switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_FBACK: /* RFC 3448, 4.4: Halve send rate directly */ @@ -219,53 +249,37 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) /* The value of R is still undefined and so we can not recompute * the timout value. Keep initial value as per [RFC 4342, 5]. */ t_nfb = TFRC_INITIAL_TIMEOUT; - ccid3_update_send_time(hctx); + ccid3_update_send_interval(hctx); break; case TFRC_SSTATE_FBACK: /* - * Check if IDLE since last timeout and recv rate is less than - * 4 packets (in units of 64*bytes/sec) per RTT + * Modify the cached value of X_recv [RFC 3448, 4.4] + * + * If (p == 0 || X_calc > 2 * X_recv) + * X_recv = max(X_recv / 2, s / (2 * t_mbi)); + * Else + * X_recv = X_calc / 4; + * + * Note that X_recv is scaled by 2^6 while X_calc is not */ - if (!hctx->ccid3hctx_idle || - (hctx->ccid3hctx_x_recv >= 4 * - scaled_div(((__u64)hctx->ccid3hctx_s) << 6, - hctx->ccid3hctx_rtt))) { - struct timeval now; + BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc); - ccid3_pr_debug("%s(%p, state=%s), not idle\n", - dccp_role(sk), sk, - ccid3_tx_state_name(hctx->ccid3hctx_state)); + if (hctx->ccid3hctx_p == 0 || + (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))) { - /* - * Modify the cached value of X_recv [RFC 3448, 4.4] - * - * If (p == 0 || X_calc > 2 * X_recv) - * X_recv = max(X_recv / 2, s / (2 * t_mbi)); - * Else - * X_recv = X_calc / 4; - * - * Note that X_recv is scaled by 2^6 while X_calc is not - */ - BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc); - - if (hctx->ccid3hctx_p == 0 || - (hctx->ccid3hctx_x_calc > - (hctx->ccid3hctx_x_recv >> 5))) { - - hctx->ccid3hctx_x_recv = - max(hctx->ccid3hctx_x_recv / 2, - (((__u64)hctx->ccid3hctx_s) << 6) / - (2 * TFRC_T_MBI)); - - if (hctx->ccid3hctx_p == 0) - dccp_timestamp(sk, &now); - } else { - hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc; - hctx->ccid3hctx_x_recv <<= 4; - } - /* Now recalculate X [RFC 3448, 4.3, step (4)] */ - ccid3_hc_tx_update_x(sk, &now); + hctx->ccid3hctx_x_recv = + max(hctx->ccid3hctx_x_recv / 2, + (((__u64)hctx->ccid3hctx_s) << 6) / + (2 * TFRC_T_MBI)); + + if (hctx->ccid3hctx_p == 0) + dccp_timestamp(sk, &now); + } else { + hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc; + hctx->ccid3hctx_x_recv <<= 4; } + /* Now recalculate X [RFC 3448, 4.3, step (4)] */ + ccid3_hc_tx_update_x(sk, &now); /* * Schedule no feedback timer to expire in * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) @@ -280,8 +294,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) goto out; } - hctx->ccid3hctx_idle = 1; - restart_timer: sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(t_nfb)); @@ -322,24 +334,35 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); hctx->ccid3hctx_last_win_count = 0; hctx->ccid3hctx_t_last_win_count = now; - ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); - - /* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */ - ccid3_hc_tx_update_s(hctx, skb->len); - hctx->ccid3hctx_x = hctx->ccid3hctx_s; - hctx->ccid3hctx_x <<= 6; - - /* First timeout, according to [RFC 3448, 4.2], is 1 second */ - hctx->ccid3hctx_t_ipi = USEC_PER_SEC; - /* Initial delta: minimum of 0.5 sec and t_gran/2 */ - hctx->ccid3hctx_delta = TFRC_OPSYS_HALF_TIME_GRAN; /* Set t_0 for initial packet */ hctx->ccid3hctx_t_nom = now; + + hctx->ccid3hctx_s = skb->len; + + /* + * Use initial RTT sample when available: recommended by erratum + * to RFC 4342. This implements the initialisation procedure of + * draft rfc3448bis, section 4.2. Remember, X is scaled by 2^6. + */ + if (dp->dccps_syn_rtt) { + ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt); + hctx->ccid3hctx_rtt = dp->dccps_syn_rtt; + hctx->ccid3hctx_x = rfc3390_initial_rate(sk); + hctx->ccid3hctx_t_ld = now; + } else { + /* Sender does not have RTT sample: X = MSS/second */ + hctx->ccid3hctx_x = dp->dccps_mss_cache; + hctx->ccid3hctx_x <<= 6; + } + ccid3_update_send_interval(hctx); + + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); break; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now); + ccid3_pr_debug("delay=%ld\n", (long)delay); /* * Scheduling of packet transmissions [RFC 3448, 4.6] * @@ -361,6 +384,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) /* prepare to send now (add options etc.) */ dp->dccps_hc_tx_insert_options = 1; DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + hctx->ccid3hctx_idle = 0; /* set the nominal send time for the next following packet */ timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); @@ -391,7 +415,6 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss; packet->dccphtx_rtt = hctx->ccid3hctx_rtt; packet->dccphtx_sent = 1; - hctx->ccid3hctx_idle = 0; } static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) @@ -402,8 +425,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct dccp_tx_hist_entry *packet; struct timeval now; unsigned long t_nfb; - u32 pinv; - suseconds_t r_sample, t_elapsed; + u32 pinv, r_sample; BUG_ON(hctx == NULL); @@ -445,18 +467,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * Calculate new round trip sample as per [RFC 3448, 4.3] by * R_sample = (now - t_recvdata) - t_elapsed */ - r_sample = timeval_delta(&now, &packet->dccphtx_tstamp); - t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10; - - DCCP_BUG_ON(r_sample < 0); - if (unlikely(r_sample <= t_elapsed)) - DCCP_WARN("WARNING: r_sample=%dus <= t_elapsed=%dus\n", - (int)r_sample, (int)t_elapsed); - else - r_sample -= t_elapsed; - CCID3_RTT_SANITY_CHECK(r_sample); + r_sample = dccp_sample_rtt(sk, &now, &packet->dccphtx_tstamp); - /* Update RTT estimate by + /* + * Update RTT estimate by * If (No feedback recv) * R = R_sample; * Else @@ -467,27 +481,23 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { /* * Larger Initial Windows [RFC 4342, sec. 5] - * We deviate in that we use `s' instead of `MSS'. */ - __u64 w_init = min(4 * hctx->ccid3hctx_s, - max(2 * hctx->ccid3hctx_s, 4380)); hctx->ccid3hctx_rtt = r_sample; - hctx->ccid3hctx_x = scaled_div(w_init << 6, r_sample); + hctx->ccid3hctx_x = rfc3390_initial_rate(sk); hctx->ccid3hctx_t_ld = now; - ccid3_update_send_time(hctx); + ccid3_update_send_interval(hctx); - ccid3_pr_debug("%s(%p), s=%u, w_init=%llu, " - "R_sample=%dus, X=%u\n", dccp_role(sk), + ccid3_pr_debug("%s(%p), s=%u, MSS=%u, " + "R_sample=%uus, X=%u\n", dccp_role(sk), sk, hctx->ccid3hctx_s, - (unsigned long long)w_init, - (int)r_sample, + dp->dccps_mss_cache, r_sample, (unsigned)(hctx->ccid3hctx_x >> 6)); ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); } else { hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt + - (u32)r_sample) / 10; + r_sample) / 10; /* Update sending rate (step 4 of [RFC 3448, 4.3]) */ if (hctx->ccid3hctx_p > 0) @@ -497,10 +507,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) hctx->ccid3hctx_p); ccid3_hc_tx_update_x(sk, &now); - ccid3_pr_debug("%s(%p), RTT=%uus (sample=%dus), s=%u, " + ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, " "p=%u, X_calc=%u, X_recv=%u, X=%u\n", dccp_role(sk), - sk, hctx->ccid3hctx_rtt, (int)r_sample, + sk, hctx->ccid3hctx_rtt, r_sample, hctx->ccid3hctx_s, hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc, (unsigned)(hctx->ccid3hctx_x_recv >> 6), @@ -644,10 +654,50 @@ static void ccid3_hc_tx_exit(struct sock *sk) dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist); } +static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) +{ + const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + + /* Listen socks doesn't have a private CCID block */ + if (sk->sk_state == DCCP_LISTEN) + return; + + BUG_ON(hctx == NULL); + + info->tcpi_rto = hctx->ccid3hctx_t_rto; + info->tcpi_rtt = hctx->ccid3hctx_rtt; +} + +static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, + u32 __user *optval, int __user *optlen) +{ + const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + const void *val; + + /* Listen socks doesn't have a private CCID block */ + if (sk->sk_state == DCCP_LISTEN) + return -EINVAL; + + switch (optname) { + case DCCP_SOCKOPT_CCID_TX_INFO: + if (len < sizeof(hctx->ccid3hctx_tfrc)) + return -EINVAL; + len = sizeof(hctx->ccid3hctx_tfrc); + val = &hctx->ccid3hctx_tfrc; + break; + default: + return -ENOPROTOOPT; + } + + if (put_user(len, optlen) || copy_to_user(optval, val, len)) + return -EFAULT; + + return 0; +} + /* - * RX Half Connection methods + * Receiver Half-Connection Routines */ - #ifdef CONFIG_IP_DCCP_CCID3_DEBUG static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) { @@ -977,8 +1027,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; struct timeval now; - u32 p_prev, rtt_prev; - suseconds_t r_sample, t_elapsed; + u32 p_prev, r_sample, rtt_prev; int loss, payload_size; BUG_ON(hcrx == NULL); @@ -994,17 +1043,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) break; rtt_prev = hcrx->ccid3hcrx_rtt; dccp_timestamp(sk, &now); - timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10); - r_sample = timeval_usecs(&now); - t_elapsed = opt_recv->dccpor_elapsed_time * 10; - - DCCP_BUG_ON(r_sample < 0); - if (unlikely(r_sample <= t_elapsed)) - DCCP_WARN("r_sample=%ldus, t_elapsed=%ldus\n", - (long)r_sample, (long)t_elapsed); - else - r_sample -= t_elapsed; - CCID3_RTT_SANITY_CHECK(r_sample); + r_sample = dccp_sample_rtt(sk, &now, NULL); if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) hcrx->ccid3hcrx_rtt = r_sample; @@ -1132,20 +1171,6 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; } -static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) -{ - const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - - /* Listen socks doesn't have a private CCID block */ - if (sk->sk_state == DCCP_LISTEN) - return; - - BUG_ON(hctx == NULL); - - info->tcpi_rto = hctx->ccid3hctx_t_rto; - info->tcpi_rtt = hctx->ccid3hctx_rtt; -} - static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen) { @@ -1173,33 +1198,6 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, return 0; } -static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, - u32 __user *optval, int __user *optlen) -{ - const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - const void *val; - - /* Listen socks doesn't have a private CCID block */ - if (sk->sk_state == DCCP_LISTEN) - return -EINVAL; - - switch (optname) { - case DCCP_SOCKOPT_CCID_TX_INFO: - if (len < sizeof(hctx->ccid3hctx_tfrc)) - return -EINVAL; - len = sizeof(hctx->ccid3hctx_tfrc); - val = &hctx->ccid3hctx_tfrc; - break; - default: - return -ENOPROTOOPT; - } - - if (put_user(len, optlen) || copy_to_user(optval, val, len)) - return -EFAULT; - - return 0; -} - static struct ccid_operations ccid3 = { .ccid_id = DCCPC_CCID3, .ccid_name = "ccid3", diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 15776a88c090..8d31b389c19c 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -51,16 +51,6 @@ /* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ #define TFRC_T_MBI 64 -/* What we think is a reasonable upper limit on RTT values */ -#define CCID3_SANE_RTT_MAX ((suseconds_t)(4 * USEC_PER_SEC)) - -#define CCID3_RTT_SANITY_CHECK(rtt) do { \ - if (rtt > CCID3_SANE_RTT_MAX) { \ - DCCP_CRIT("RTT (%d) too large, substituting %d", \ - (int)rtt, (int)CCID3_SANE_RTT_MAX); \ - rtt = CCID3_SANE_RTT_MAX; \ - } } while (0) - enum ccid3_options { TFRC_OPT_LOSS_EVENT_RATE = 192, TFRC_OPT_LOSS_INTERVALS = 193, diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 0a0baef16b3e..372d7e75cdd8 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -91,7 +91,7 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) u32 w_tot = 0; list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) { - if (li_entry->dccplih_interval != ~0) { + if (li_entry->dccplih_interval != ~0U) { i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i]; w_tot += dccp_li_hist_w[i]; if (i != 0) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index e33a9edb4036..d8ad27bfe01a 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -31,13 +31,9 @@ __stringify(cond)); \ } while (0) -#ifdef MODULE #define DCCP_PRINTK(enable, fmt, args...) do { if (enable) \ printk(fmt, ##args); \ } while(0) -#else -#define DCCP_PRINTK(enable, fmt, args...) printk(fmt, ##args) -#endif #define DCCP_PR_DEBUG(enable, fmt, a...) DCCP_PRINTK(enable, KERN_DEBUG \ "%s: " fmt, __FUNCTION__, ##a) @@ -75,11 +71,15 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); /* RFC 1122, 4.2.3.1 initial RTO value */ #define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ)) +#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ + +/* bounds for sampled RTT values from packet exchanges (in usec) */ +#define DCCP_SANE_RTT_MIN 100 +#define DCCP_SANE_RTT_MAX (4 * USEC_PER_SEC) + /* Maximal interval between probes for local resources. */ #define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) -#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ - /* sysctl variables for DCCP */ extern int sysctl_dccp_request_retries; extern int sysctl_dccp_retries1; @@ -92,17 +92,43 @@ extern int sysctl_dccp_feat_send_ack_vector; extern int sysctl_dccp_feat_send_ndp_count; extern int sysctl_dccp_tx_qlen; +/* + * 48-bit sequence number arithmetic (signed and unsigned) + */ +#define INT48_MIN 0x800000000000LL /* 2^47 */ +#define UINT48_MAX 0xFFFFFFFFFFFFLL /* 2^48 - 1 */ +#define COMPLEMENT48(x) (0x1000000000000LL - (x)) /* 2^48 - x */ +#define TO_SIGNED48(x) (((x) < INT48_MIN)? (x) : -COMPLEMENT48( (x))) +#define TO_UNSIGNED48(x) (((x) >= 0)? (x) : COMPLEMENT48(-(x))) +#define ADD48(a, b) (((a) + (b)) & UINT48_MAX) +#define SUB48(a, b) ADD48((a), COMPLEMENT48(b)) + +static inline void dccp_set_seqno(u64 *seqno, u64 value) +{ + *seqno = value & UINT48_MAX; +} + +static inline void dccp_inc_seqno(u64 *seqno) +{ + *seqno = ADD48(*seqno, 1); +} + +/* signed mod-2^48 distance: pos. if seqno1 < seqno2, neg. if seqno1 > seqno2 */ +static inline s64 dccp_delta_seqno(const u64 seqno1, const u64 seqno2) +{ + u64 delta = SUB48(seqno2, seqno1); + + return TO_SIGNED48(delta); +} + /* is seq1 < seq2 ? */ static inline int before48(const u64 seq1, const u64 seq2) { - return (s64)((seq1 << 16) - (seq2 << 16)) < 0; + return (s64)((seq2 << 16) - (seq1 << 16)) > 0; } /* is seq1 > seq2 ? */ -static inline int after48(const u64 seq1, const u64 seq2) -{ - return (s64)((seq2 << 16) - (seq1 << 16)) < 0; -} +#define after48(seq1, seq2) before48(seq2, seq1) /* is seq2 <= seq1 <= seq3 ? */ static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3) @@ -118,9 +144,7 @@ static inline u64 max48(const u64 seq1, const u64 seq2) /* is seq1 next seqno after seq2 */ static inline int follows48(const u64 seq1, const u64 seq2) { - int diff = (seq1 & 0xFFFF) - (seq2 & 0xFFFF); - - return diff==1; + return dccp_delta_seqno(seq2, seq1) == 1; } enum { @@ -272,6 +296,8 @@ extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); extern void dccp_send_close(struct sock *sk, const int active); extern int dccp_invalid_packet(struct sk_buff *skb); +extern u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv, + struct timeval *t_history); static inline int dccp_bad_service_code(const struct sock *sk, const __be32 service) @@ -313,26 +339,7 @@ static inline int dccp_packet_without_ack(const struct sk_buff *skb) return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST; } -#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1) -#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2) - -static inline void dccp_set_seqno(u64 *seqno, u64 value) -{ - if (value > DCCP_MAX_SEQNO) - value -= DCCP_MAX_SEQNO + 1; - *seqno = value; -} - -static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2) -{ - return ((seqno2 << 16) - (seqno1 << 16)) >> 16; -} - -static inline void dccp_inc_seqno(u64 *seqno) -{ - if (++*seqno > DCCP_MAX_SEQNO) - *seqno = 0; -} +#define DCCP_PKT_WITHOUT_ACK_SEQ (UINT48_MAX << 2) static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss) { diff --git a/net/dccp/input.c b/net/dccp/input.c index 78b043c458bf..da6ec185ed5b 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -86,7 +86,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) dh->dccph_type == DCCP_PKT_SYNCACK) { if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh) && - !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl)) + dccp_delta_seqno(dp->dccps_swl, + DCCP_SKB_CB(skb)->dccpd_seq) >= 0) dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); else return -1; @@ -203,7 +204,8 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (dp->dccps_role != DCCP_ROLE_CLIENT) goto send_sync; check_seq: - if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) { + if (dccp_delta_seqno(dp->dccps_osr, + DCCP_SKB_CB(skb)->dccpd_seq) >= 0) { send_sync: dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); @@ -298,6 +300,14 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, if (dccp_parse_options(sk, skb)) goto out_invalid_packet; + /* Obtain RTT sample from SYN exchange (used by CCID 3) */ + if (dp->dccps_options_received.dccpor_timestamp_echo) { + struct timeval now; + + dccp_timestamp(sk, &now); + dp->dccps_syn_rtt = dccp_sample_rtt(sk, &now, NULL); + } + if (dccp_msk(sk)->dccpms_send_ack_vector && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, DCCP_SKB_CB(skb)->dccpd_seq, @@ -575,3 +585,43 @@ discard: } EXPORT_SYMBOL_GPL(dccp_rcv_state_process); + +/** + * dccp_sample_rtt - Sample RTT from packet exchange + * + * @sk: connected dccp_sock + * @t_recv: receive timestamp of packet with timestamp echo + * @t_hist: packet history timestamp or NULL + */ +u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv, + struct timeval *t_hist) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_options_received *or = &dp->dccps_options_received; + suseconds_t delta; + + if (t_hist == NULL) { + if (!or->dccpor_timestamp_echo) { + DCCP_WARN("packet without timestamp echo\n"); + return DCCP_SANE_RTT_MAX; + } + timeval_sub_usecs(t_recv, or->dccpor_timestamp_echo * 10); + delta = timeval_usecs(t_recv); + } else + delta = timeval_delta(t_recv, t_hist); + + delta -= or->dccpor_elapsed_time * 10; /* either set or 0 */ + + if (unlikely(delta <= 0)) { + DCCP_WARN("unusable RTT sample %ld, using min\n", (long)delta); + return DCCP_SANE_RTT_MIN; + } + if (unlikely(delta - (suseconds_t)DCCP_SANE_RTT_MAX > 0)) { + DCCP_WARN("RTT sample %ld too large, using max\n", (long)delta); + return DCCP_SANE_RTT_MAX; + } + + return delta; +} + +EXPORT_SYMBOL_GPL(dccp_sample_rtt); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 4a83978aa660..718f2fa923a1 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -207,8 +207,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) (iph->ihl << 2)); struct dccp_sock *dp; struct inet_sock *inet; - const int type = skb->h.icmph->type; - const int code = skb->h.icmph->code; + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; struct sock *sk; __u64 seq; int err; @@ -363,8 +363,8 @@ EXPORT_SYMBOL_GPL(dccp_v4_send_check); static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb) { - return secure_dccp_sequence_number(skb->nh.iph->daddr, - skb->nh.iph->saddr, + return secure_dccp_sequence_number(ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, dccp_hdr(skb)->dccph_dport, dccp_hdr(skb)->dccph_sport); } @@ -405,7 +405,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->opt = ireq->opt; ireq->opt = NULL; newinet->mc_index = inet_iif(skb); - newinet->mc_ttl = skb->nh.iph->ttl; + newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->id = jiffies; dccp_sync_mss(newsk, dst_mtu(dst)); @@ -428,7 +428,7 @@ EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); - const struct iphdr *iph = skb->nh.iph; + const struct iphdr *iph = ip_hdr(skb); struct sock *nsk; struct request_sock **prev; /* Find possible connection requests. */ @@ -460,8 +460,8 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, struct rtable *rt; struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, .nl_u = { .ip4_u = - { .daddr = skb->nh.iph->saddr, - .saddr = skb->nh.iph->daddr, + { .daddr = ip_hdr(skb)->saddr, + .saddr = ip_hdr(skb)->daddr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, .uli_u = { .ports = @@ -513,6 +513,7 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { int err; struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; + const struct iphdr *rxiph; const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext) + sizeof(struct dccp_hdr_reset); @@ -559,13 +560,13 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); dccp_csum_outgoing(skb); - dh->dccph_checksum = dccp_v4_csum_finish(skb, rxskb->nh.iph->saddr, - rxskb->nh.iph->daddr); + rxiph = ip_hdr(rxskb); + dh->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr, + rxiph->daddr); bh_lock_sock(dccp_v4_ctl_socket->sk); err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk, - rxskb->nh.iph->daddr, - rxskb->nh.iph->saddr, NULL); + rxiph->daddr, rxiph->saddr, NULL); bh_unlock_sock(dccp_v4_ctl_socket->sk); if (net_xmit_eval(err) == 0) { @@ -640,8 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; ireq = inet_rsk(req); - ireq->loc_addr = skb->nh.iph->daddr; - ireq->rmt_addr = skb->nh.iph->saddr; + ireq->loc_addr = ip_hdr(skb)->daddr; + ireq->rmt_addr = ip_hdr(skb)->saddr; ireq->opt = NULL; /* @@ -809,6 +810,7 @@ EXPORT_SYMBOL_GPL(dccp_invalid_packet); static int dccp_v4_rcv(struct sk_buff *skb) { const struct dccp_hdr *dh; + const struct iphdr *iph; struct sock *sk; int min_cov; @@ -817,8 +819,9 @@ static int dccp_v4_rcv(struct sk_buff *skb) if (dccp_invalid_packet(skb)) goto discard_it; + iph = ip_hdr(skb); /* Step 1: If header checksum is incorrect, drop packet and return */ - if (dccp_v4_csum_finish(skb, skb->nh.iph->saddr, skb->nh.iph->daddr)) { + if (dccp_v4_csum_finish(skb, iph->saddr, iph->daddr)) { DCCP_WARN("dropped packet with invalid checksum\n"); goto discard_it; } @@ -832,8 +835,8 @@ static int dccp_v4_rcv(struct sk_buff *skb) "src=%u.%u.%u.%u@%-5d " "dst=%u.%u.%u.%u@%-5d seq=%llu", dccp_packet_name(dh->dccph_type), - NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport), - NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport), + NIPQUAD(iph->saddr), ntohs(dh->dccph_sport), + NIPQUAD(iph->daddr), ntohs(dh->dccph_dport), (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); if (dccp_packet_without_ack(skb)) { @@ -848,10 +851,8 @@ static int dccp_v4_rcv(struct sk_buff *skb) /* Step 2: * Look up flow ID in table and get corresponding socket */ sk = __inet_lookup(&dccp_hashinfo, - skb->nh.iph->saddr, dh->dccph_sport, - skb->nh.iph->daddr, dh->dccph_dport, - inet_iif(skb)); - + iph->saddr, dh->dccph_sport, + iph->daddr, dh->dccph_dport, inet_iif(skb)); /* * Step 2: * If no socket ... diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 7f51e8db3967..64eac2515aa2 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -84,8 +84,8 @@ static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb) { - return secure_dccpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32, - skb->nh.ipv6h->saddr.s6_addr32, + return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32, dccp_hdr(skb)->dccph_dport, dccp_hdr(skb)->dccph_sport ); @@ -261,8 +261,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, if (rxopt->srcrt) opt = ipv6_invert_rthdr(sk, - (struct ipv6_rt_hdr *)(pktopts->nh.raw + - rxopt->srcrt)); + (struct ipv6_rt_hdr *)(skb_network_header(pktopts) + + rxopt->srcrt)); } if (opt != NULL && opt->srcrt != NULL) { @@ -313,6 +313,7 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req) static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; + struct ipv6hdr *rxip6h; const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext) + sizeof(struct dccp_hdr_reset); @@ -352,12 +353,13 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); dccp_csum_outgoing(skb); - dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxskb->nh.ipv6h->saddr, - &rxskb->nh.ipv6h->daddr); + rxip6h = ipv6_hdr(rxskb); + dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, + &rxip6h->daddr); memset(&fl, 0, sizeof(fl)); - ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr); - ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr); + ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr); + ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr); fl.proto = IPPROTO_DCCP; fl.oif = inet6_iif(rxskb); @@ -390,7 +392,7 @@ static struct request_sock_ops dccp6_request_sock_ops = { static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); - const struct ipv6hdr *iph = skb->nh.ipv6h; + const struct ipv6hdr *iph = ipv6_hdr(skb); struct sock *nsk; struct request_sock **prev; /* Find possible connection requests. */ @@ -460,8 +462,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; ireq6 = inet6_rsk(req); - ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr); - ipv6_addr_copy(&ireq6->loc_addr, &skb->nh.ipv6h->daddr); + ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr); + ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr); ireq6->pktopts = NULL; if (ipv6_opt_accepted(sk, skb) || @@ -546,7 +548,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); - newnp->mcast_hops = skb->nh.ipv6h->hop_limit; + newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count @@ -573,8 +575,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, if (rxopt->srcrt) opt = ipv6_invert_rthdr(sk, - (struct ipv6_rt_hdr *)(ireq6->pktopts->nh.raw + - rxopt->srcrt)); + (struct ipv6_rt_hdr *)(skb_network_header(ireq6->pktopts) + + rxopt->srcrt)); } if (dst == NULL) { @@ -653,7 +655,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, } newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); - newnp->mcast_hops = skb->nh.ipv6h->hop_limit; + newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; /* * Clone native IPv6 options from listening socket (if any) @@ -826,8 +828,8 @@ static int dccp_v6_rcv(struct sk_buff **pskb) goto discard_it; /* Step 1: If header checksum is incorrect, drop packet and return. */ - if (dccp_v6_csum_finish(skb, &skb->nh.ipv6h->saddr, - &skb->nh.ipv6h->daddr)) { + if (dccp_v6_csum_finish(skb, &ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr)) { DCCP_WARN("dropped packet with invalid checksum\n"); goto discard_it; } @@ -844,9 +846,9 @@ static int dccp_v6_rcv(struct sk_buff **pskb) /* Step 2: * Look up flow ID in table and get corresponding socket */ - sk = __inet6_lookup(&dccp_hashinfo, &skb->nh.ipv6h->saddr, + sk = __inet6_lookup(&dccp_hashinfo, &ipv6_hdr(skb)->saddr, dh->dccph_sport, - &skb->nh.ipv6h->daddr, ntohs(dh->dccph_dport), + &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport), inet6_iif(skb)); /* * Step 2: diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 6d235b3013dd..e18e249ac49b 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -27,7 +27,7 @@ struct inet_timewait_death_row dccp_death_row = { .sysctl_max_tw_buckets = NR_FILE * 2, .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, - .death_lock = SPIN_LOCK_UNLOCKED, + .death_lock = __SPIN_LOCK_UNLOCKED(dccp_death_row.death_lock), .hashinfo = &dccp_hashinfo, .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, (unsigned long)&dccp_death_row), diff --git a/net/dccp/options.c b/net/dccp/options.c index ca13f7731994..34d536d5f1a1 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -29,8 +29,6 @@ int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO; int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; -EXPORT_SYMBOL_GPL(sysctl_dccp_feat_sequence_window); - void dccp_minisock_init(struct dccp_minisock *dmsk) { dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; @@ -174,21 +172,25 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_timestamp_echo = ntohl(*(__be32 *)value); dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, " - "ackno=%llu, ", dccp_role(sk), + "ackno=%llu", dccp_role(sk), opt_recv->dccpor_timestamp_echo, len + 2, (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); - if (len == 4) + if (len == 4) { + dccp_pr_debug_cat("\n"); break; + } if (len == 6) elapsed_time = ntohs(*(__be16 *)(value + 4)); else elapsed_time = ntohl(*(__be32 *)(value + 4)); + dccp_pr_debug_cat(", ELAPSED_TIME=%d\n", elapsed_time); + /* Give precedence to the biggest ELAPSED_TIME */ if (elapsed_time > opt_recv->dccpor_elapsed_time) opt_recv->dccpor_elapsed_time = elapsed_time; @@ -565,6 +567,14 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) dccp_insert_options_feat(sk, skb)) return -1; + /* + * Obtain RTT sample from Request/Response exchange. + * This is currently used in CCID 3 initialisation. + */ + if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST && + dccp_insert_option_timestamp(sk, skb)) + return -1; + /* XXX: insert other options when appropriate */ if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) { diff --git a/net/dccp/output.c b/net/dccp/output.c index aa21cc4de37f..c8d843e983fc 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -194,6 +194,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb) rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); if (rc <= 0) break; + dccp_pr_debug("delayed send by %d msec\n", rc); delay = msecs_to_jiffies(rc); sk->sk_write_pending++; release_sock(sk); @@ -255,7 +256,7 @@ void dccp_write_xmit(struct sock *sk, int block) DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", err); } else { - dccp_pr_debug("packet discarded\n"); + dccp_pr_debug("packet discarded due to err=%d\n", err); kfree_skb(skb); } } diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 3b1f509f51dd..1f5e3ba62065 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -90,15 +90,18 @@ static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk, if (port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) { if (hctx) - printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %d\n", - NIPQUAD(inet->saddr), ntohs(inet->sport), - NIPQUAD(inet->daddr), ntohs(inet->dport), size, - hctx->ccid3hctx_s, hctx->ccid3hctx_rtt, - hctx->ccid3hctx_p, hctx->ccid3hctx_t_ipi); + printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %u " + "%llu %llu %d\n", + NIPQUAD(inet->saddr), ntohs(inet->sport), + NIPQUAD(inet->daddr), ntohs(inet->dport), size, + hctx->ccid3hctx_s, hctx->ccid3hctx_rtt, + hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc, + hctx->ccid3hctx_x_recv >> 6, + hctx->ccid3hctx_x >> 6, hctx->ccid3hctx_t_ipi); else printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n", - NIPQUAD(inet->saddr), ntohs(inet->sport), - NIPQUAD(inet->daddr), ntohs(inet->dport), size); + NIPQUAD(inet->saddr), ntohs(inet->sport), + NIPQUAD(inet->daddr), ntohs(inet->dport), size); } jprobe_return(); |